Commit 1ba7918c authored by Mael NICOLAS's avatar Mael NICOLAS

Merge branch 'servant-update' into 'dev'

Servant update

See merge request !3
parents 8e6da01e 1602ca38
...@@ -6,14 +6,36 @@ import Network.HTTP.Client (newManager) ...@@ -6,14 +6,36 @@ import Network.HTTP.Client (newManager)
import Network.HTTP.Client.TLS (tlsManagerSettings) import Network.HTTP.Client.TLS (tlsManagerSettings)
import Servant.Client import Servant.Client
import HAL (runStructureRequest)
import HAL.Client import HAL.Client
import HAL.Doc
import Tree
main :: IO () main :: IO ()
main = do main = do
manager' <- newManager tlsManagerSettings manager' <- newManager tlsManagerSettings
res <- runClientM res <- runStructureRequest $ Just "parentDocid_i:302102"
(search ["docid"] ["docType_s:(THESE OR HDR)"] (asc "docid") (Just 1) (Just 2))
(mkClientEnv manager' $ BaseUrl Https "api.archives-ouvertes.fr" 443 "")
case res of case res of
(Left err) -> print err (Left err) -> print err
(Right val) -> print val (Right val) -> print val
{-
main :: IO ()
main = do
rootRes <- runHalAPIClient $ structure (Just $ fqRootDoc) (Just "docid, parentDocid_i, label_s") (Just 10000)
case rootRes of
(Left err) -> print err
(Right val) -> do
childrenRes <- runHalAPIClient $ structure (Just $ fqParentDoc) (Just "docid, parentDocid_i, label_s") (Just 10000)
case childrenRes of
(Left err2) -> print err2
(Right val2) -> do
children <- fetchChildren $ val2 ^. docs
let trees = buildTree 0 children <$> (val ^. docs)
let noDuplicateTrees = noDuplicateTree <$> trees
mapM_ putStrLn $ formatTree <$> noDuplicateTrees
where fqParentDoc =
"parentDocid_i:(302102 || 469216 || 6279 || 224096 || 144103 || 497330 || 84538 || 301262 || 481355 || 29212 || 301442 || 542824 || 300362 || 352124 || 300104 || 421532 || 301492)"
fqRootDoc =
"docid:(302102 || 469216 || 6279 || 224096 || 144103 || 497330 || 84538 || 301262 || 481355 || 29212 || 301442 || 542824 || 300362 || 352124 || 300104 || 421532 || 301492)"
-}
-- This file has been generated from package.yaml by hpack version 0.28.2.
--
-- see: https://github.com/sol/hpack
--
-- hash: 9634b793447cd493f4caaa2fa593379ae858856de45a2b71bb012af23e3c40f1
name: crawlerHAL
version: 0.1.0.0
description: Please see the README at <https://git@gitlab.iscpif.fr:20022/gargantext/crawlers/hal>
homepage: https://github.com/https://git@gitlab.iscpif.fr:20022/gargantext/crawlers/hal#readme
bug-reports: https://github.com/https://git@gitlab.iscpif.fr:20022/gargantext/crawlers/hal/issues
author: CNRS/IMT
maintainer: contact@gargantext.org
copyright: 2019 CNRS/IMT
license: BSD3
license-file: LICENSE
build-type: Simple
cabal-version: >= 1.10
extra-source-files:
ChangeLog.md
README.md
source-repository head
type: git
location: https://github.com/https://git@gitlab.iscpif.fr:20022/gargantext/crawlers/hal
library
exposed-modules:
HAL
HAL.Client
HAL.Doc
Tree
other-modules:
Paths_crawlerHAL
hs-source-dirs:
src
build-depends:
aeson
, base >=4.7 && <5
, bytestring
, containers
, data-default
, http-client
, http-client-tls
, lens
, scientific
, servant
, servant-client
, split
, text
, utf8-string
, vector
default-language: Haskell2010
executable crawlerHAL-exe
main-is: Main.hs
other-modules:
Paths_crawlerHAL
hs-source-dirs:
app
ghc-options: -threaded -rtsopts -with-rtsopts=-N
build-depends:
aeson
, base >=4.7 && <5
, bytestring
, containers
, crawlerHAL
, data-default
, http-client
, http-client-tls
, lens
, scientific
, servant
, servant-client
, split
, text
, utf8-string
, vector
default-language: Haskell2010
test-suite halCrawler-test
type: exitcode-stdio-1.0
main-is: Spec.hs
other-modules:
Paths_crawlerHAL
hs-source-dirs:
test
ghc-options: -threaded -rtsopts -with-rtsopts=-N
build-depends:
aeson
, base >=4.7 && <5
, bytestring
, containers
, data-default
, halCrawler
, http-client
, http-client-tls
, lens
, scientific
, servant
, servant-client
, split
, text
, utf8-string
, vector
default-language: Haskell2010
name: halCrawler name: crawlerHAL
version: 0.1.0.0 version: 0.1.0.0
github: "githubuser/halCrawler" github: "https://git@gitlab.iscpif.fr:20022/gargantext/crawlers/hal"
license: BSD3 license: BSD3
author: "Author name here" author: "CNRS/IMT"
maintainer: "example@example.com" maintainer: "contact@gargantext.org"
copyright: "2019 Author name here" copyright: "2019 CNRS/IMT"
extra-source-files: extra-source-files:
- README.md - README.md
...@@ -17,7 +17,7 @@ extra-source-files: ...@@ -17,7 +17,7 @@ extra-source-files:
# To avoid duplicated efforts in documentation and dealing with the # To avoid duplicated efforts in documentation and dealing with the
# complications of embedding Haddock markup inside cabal files, it is # complications of embedding Haddock markup inside cabal files, it is
# common to point users to the README.md file. # common to point users to the README.md file.
description: Please see the README on GitHub at <https://github.com/githubuser/halCrawler#readme> description: Please see the README at <https://git@gitlab.iscpif.fr:20022/gargantext/crawlers/hal>
dependencies: dependencies:
- base >= 4.7 && < 5 - base >= 4.7 && < 5
...@@ -34,12 +34,13 @@ dependencies: ...@@ -34,12 +34,13 @@ dependencies:
- split - split
- scientific - scientific
- vector - vector
- data-default
library: library:
source-dirs: src source-dirs: src
executables: executables:
halCrawler-exe: crawlerHAL-exe:
main: Main.hs main: Main.hs
source-dirs: app source-dirs: app
ghc-options: ghc-options:
...@@ -47,7 +48,7 @@ executables: ...@@ -47,7 +48,7 @@ executables:
- -rtsopts - -rtsopts
- -with-rtsopts=-N - -with-rtsopts=-N
dependencies: dependencies:
- halCrawler - crawlerHAL
tests: tests:
halCrawler-test: halCrawler-test:
......
{-# LANGUAGE OverloadedStrings #-}
module HAL where
import Data.Default (def)
import Data.Text
import Network.HTTP.Client (newManager)
import Network.HTTP.Client.TLS (tlsManagerSettings)
import Servant.Client (BaseUrl(..), Scheme(..), ClientM, ClientError, runClientM, mkClientEnv)
import HAL.Client
import HAL.Doc
runHalAPIClient :: ClientM (Response Doc) -> IO (Either ClientError (Response Doc))
runHalAPIClient cmd = do
manager' <- newManager tlsManagerSettings
runClientM cmd (mkClientEnv manager' $ BaseUrl Https "api.archives-ouvertes.fr" 443 "")
runStructureRequest :: Maybe Text -> IO (Either ClientError (Response Doc))
runStructureRequest rq =
runHalAPIClient $ structure def rq (Just 10000)
runSearchRequest :: [Text] -> IO (Either ClientError (Response Doc))
runSearchRequest rq =
runHalAPIClient $ search def rq Nothing Nothing Nothing
...@@ -3,6 +3,7 @@ ...@@ -3,6 +3,7 @@
{-# LANGUAGE TypeOperators #-} {-# LANGUAGE TypeOperators #-}
{-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE TemplateHaskell #-} {-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE NoMonomorphismRestriction#-}
module HAL.Client where module HAL.Client where
...@@ -11,6 +12,7 @@ import GHC.Generics ...@@ -11,6 +12,7 @@ import GHC.Generics
import Servant.API import Servant.API
import Servant.Client hiding (Response) import Servant.Client hiding (Response)
import Data.Text import Data.Text
import Data.Map import Data.Map
import Data.Aeson import Data.Aeson
import qualified Data.ByteString.Lazy as BSL import qualified Data.ByteString.Lazy as BSL
...@@ -18,17 +20,12 @@ import qualified Codec.Binary.UTF8.String as UTF ...@@ -18,17 +20,12 @@ import qualified Codec.Binary.UTF8.String as UTF
import Control.Lens as L (makeLenses) import Control.Lens as L (makeLenses)
type HALAPI = Search :<|> Structure type HALAPI doc = Search doc
:<|> Structure doc
type Structure = "ref" :> "structure"
:> QueryParam "fq" Text
:> QueryParam "fl" Text
:> QueryParam "rows" Int
:> Get '[JSON] Response
type Search = "search" type Search doc = "search"
-- fl determine which fields will be returned it can be a list of fields or * -- fl determine which fields will be returned it can be a list of fields or *
:> QueryParams "fl" Text :> QueryParam "fl" doc
-- TODO: type this monster -- TODO: type this monster
-- fq is to filter request -- fq is to filter request
:> QueryParams "fq" Text :> QueryParams "fq" Text
...@@ -38,7 +35,13 @@ type Search = "search" ...@@ -38,7 +35,13 @@ type Search = "search"
:> QueryParam "start" Int :> QueryParam "start" Int
-- use rows to make the request only return the x number of result -- use rows to make the request only return the x number of result
:> QueryParam "rows" Int :> QueryParam "rows" Int
:> Get '[JSON] Response :> Get '[JSON] (Response doc)
type Structure doc = "ref" :> "structure"
:> QueryParam "fl" doc
:> QueryParam "fq" Text
:> QueryParam "rows" Int
:> Get '[JSON] (Response doc)
-- Get's argument type -- Get's argument type
data SortField = Asc Text | Desc Text data SortField = Asc Text | Desc Text
...@@ -53,49 +56,52 @@ asc = Just . Asc ...@@ -53,49 +56,52 @@ asc = Just . Asc
desc :: Text -> Maybe SortField desc :: Text -> Maybe SortField
desc = Just . Desc desc = Just . Desc
newtype Doc = Doc (Map Text Value) -- newtype Doc = Doc (Map Text Value)
deriving (Generic) -- deriving (Generic)
--
instance Eq Doc where --instance Eq Doc where
(==) (Doc doc) (Doc doc') = (doc ! "docid") == (doc' ! "docid") -- (==) (Doc doc) (Doc doc') = (doc ! "docid") == (doc' ! "docid")
--
instance Show Doc where --instance Show Doc where
show (Doc o) = (UTF.decode $ BSL.unpack $ encode $ o ! "label_s") -- show (Doc o) = (UTF.decode $ BSL.unpack $ encode $ o ! "label_s")
<> "(" -- <> "("
<> (show . encode $ o ! "docid") -- <> (show . encode $ o ! "docid")
<> ")" -- <> ")"
--
instance FromJSON Doc --instance FromJSON Doc
instance ToJSON Doc --instance ToJSON Doc
-- Response type -- Response type
data Response = Response data Response doc = Response
{ {
_numFound :: Integer, _numFound :: Integer,
_start :: Int, _start :: Int,
_docs :: [Doc] _docs :: [doc]
} deriving (Show, Generic) } deriving (Show, Generic)
L.makeLenses ''Response L.makeLenses ''Response
instance FromJSON doc => FromJSON (Response doc) where
instance FromJSON Response where
parseJSON (Object o) = Response <$> parseJSON (Object o) = Response <$>
((o .: "response") >>= (.: "numFound")) ((o .: "response") >>= (.: "numFound"))
<*> ((o .: "response") >>= (.: "start")) <*> ((o .: "response") >>= (.: "start"))
<*> ((o .: "response") >>= (.: "docs")) <*> ((o .: "response") >>= (.: "docs"))
halAPI :: Proxy (HALAPI doc)
halAPI :: Proxy HALAPI
halAPI = Proxy halAPI = Proxy
structure :: Maybe Text -- fq -- search should always have at least `docid` and `label_s` in his fl params
-> Maybe Text search :: (FromJSON doc, ToHttpApiData doc) =>
-> Maybe Int -- rows Maybe doc -- fl
-> ClientM Response
search :: [Text] -- fl
-> [Text] -- fq -> [Text] -- fq
-> Maybe SortField -- sort -> Maybe SortField -- sort
-> Maybe Int -- start -> Maybe Int -- start
-> Maybe Int -- rows -> Maybe Int -- rows
-> ClientM Response -> ClientM (Response doc)
structure :: (FromJSON doc, ToHttpApiData doc) =>
Maybe doc
-> Maybe Text -- fq
-> Maybe Int -- rows
-> ClientM (Response doc)
(search :<|> structure) = client halAPI (search :<|> structure) = client halAPI
{-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE OverloadedStrings #-}
module HAL.Doc where
import GHC.Generics
import Data.Aeson ((.:), (.:?), (.!=), Value(..), ToJSON, FromJSON(..), encode)
import Data.Default
import Data.Text (pack, Text)
import Servant.API (ToHttpApiData(..))
data Doc = Doc
{
_docid :: Int,
_label_s :: Maybe Text,
_parentDocid_i :: [Text]
} deriving (Generic)
instance Default Doc where
def = Doc def def def
instance FromJSON Doc where
parseJSON (Object o) = Doc <$>
(o .: "docid")
<*> (o .:? "label_s")
<*> (o .:? "parentDocid_i" .!= [])
instance ToHttpApiData Doc where
toUrlPiece _ = "docid,label_s,parentDocid_i"
instance Show Doc where
show (Doc id label _) = show label
<> "(" <> show id <> ")"
{-# LANGUAGE DataKinds #-} {-# LANGUAGE DataKinds #-}
{-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE TypeOperators #-} {-# LANGUAGE TypeOperators #-}
{-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE OverloadedStrings #-}
module Tree where module Tree where
import GHC.Generics
import Network.HTTP.Client (newManager) import Network.HTTP.Client (newManager)
import Network.HTTP.Client.TLS (tlsManagerSettings) import Network.HTTP.Client.TLS (tlsManagerSettings)
import Servant.Client hiding (Response) import Servant.Client hiding (Response)
...@@ -13,8 +11,6 @@ import Control.Lens.Getter ((^.)) ...@@ -13,8 +11,6 @@ import Control.Lens.Getter ((^.))
import Data.Map ((!?), (!), insert, empty, Map, fromList, toList) import Data.Map ((!?), (!), insert, empty, Map, fromList, toList)
import qualified Data.Map as M import qualified Data.Map as M
import Data.Map.Internal (merge, preserveMissing, zipWithMatched) import Data.Map.Internal (merge, preserveMissing, zipWithMatched)
import Data.Text (pack, Text)
import Data.Aeson (Value(..), ToJSON, encode)
import Data.List (groupBy, isInfixOf) import Data.List (groupBy, isInfixOf)
import Data.List.Split (chunksOf) import Data.List.Split (chunksOf)
import Data.Either (rights) import Data.Either (rights)
...@@ -24,34 +20,17 @@ import qualified Data.Vector as V ...@@ -24,34 +20,17 @@ import qualified Data.Vector as V
import Text.Printf import Text.Printf
import HAL.Client import HAL.Client
import HAL.Doc
runHalAPIClient :: ClientM Response -> IO (Either ServantError Response) {-
runHalAPIClient cmd = do
manager' <- newManager tlsManagerSettings
runClientM cmd (mkClientEnv manager' $ BaseUrl Https "api.archives-ouvertes.fr" 443 "")
docid :: Doc -> Value
docid (Doc doc) = doc ! "docid"
formatParentIdRequest :: [Doc] -> Maybe Text formatParentIdRequest :: [Doc] -> Maybe Text
formatParentIdRequest [] = Nothing formatParentIdRequest [] = Nothing
formatParentIdRequest (x:[]) = Just . pack . show $ docid x formatParentIdRequest (x:[]) = Just . pack . show $ _docid x
formatParentIdRequest (x:xs) = formatParentIdRequest (x:xs) =
(Just . pack . show $ docid x) (Just . pack . show $ _docid x)
<> (Just " || ") <> (Just " || ")
<> formatParentIdRequest xs <> formatParentIdRequest xs
runStructureRequest :: Maybe Text -> IO (Either ServantError Response)
runStructureRequest rq =
runHalAPIClient $ structure rq (Just "docid, parentDocid_i, label_s") (Just 10000)
scientific2text :: Scientific -> Text
scientific2text n =
case floatingOrInteger n of
Left r -> pack $ show (r :: Double)
Right i -> pack $ show (i :: Integer)
ds2Child :: [Doc] -> IO [Doc] ds2Child :: [Doc] -> IO [Doc]
ds2Child ds = do ds2Child ds = do
rs <- sequence $ runStructureRequest <$> formatedRequests ds rs <- sequence $ runStructureRequest <$> formatedRequests ds
...@@ -64,9 +43,9 @@ fetchChildren [] = pure [] ...@@ -64,9 +43,9 @@ fetchChildren [] = pure []
fetchChildren ds = (ds <>) <$> (fetchChildren =<< ds2Child ds) fetchChildren ds = (ds <>) <$> (fetchChildren =<< ds2Child ds)
isChildOf :: Doc -> Doc -> Bool isChildOf :: Doc -> Doc -> Bool
isChildOf (Doc d) (Doc d') = not . null $ V.filter (\id -> id == (scientific2text docId)) ((\(String a) -> a) <$> parentDocIds) isChildOf (Doc i l) (Doc i' l') = not . null $ V.filter (\id -> id == (scientific2text docId)) ((\(String a) -> a) <$> parentDocIds)
where (Number docId ) = (d ! "docid") where (Number docId ) = d ! "docid"
(Array parentDocIds) = (d' ! "parentDocid_i") (Array parentDocIds) = d' ! "parentDocid_i"
data DocTree = DocTree Doc Int [DocTree] data DocTree = DocTree Doc Int [DocTree]
deriving (Show, Generic) deriving (Show, Generic)
...@@ -101,3 +80,5 @@ removeDuplicate deepMap tree@(DocTree doc depth children) = DocTree doc depth (r ...@@ -101,3 +80,5 @@ removeDuplicate deepMap tree@(DocTree doc depth children) = DocTree doc depth (r
noDuplicateTree :: DocTree -> DocTree noDuplicateTree :: DocTree -> DocTree
noDuplicateTree tree = removeDuplicate (findDeepest M.empty tree) tree noDuplicateTree tree = removeDuplicate (findDeepest M.empty tree) tree
-}
...@@ -17,7 +17,7 @@ ...@@ -17,7 +17,7 @@
# #
# resolver: ./custom-snapshot.yaml # resolver: ./custom-snapshot.yaml
# resolver: https://example.com/snapshots/2018-01-01.yaml # resolver: https://example.com/snapshots/2018-01-01.yaml
resolver: lts-13.10 resolver: lts-14.1
# User packages to be built. # User packages to be built.
# Various formats can be used as shown in the example below. # Various formats can be used as shown in the example below.
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment