Commit 1ba7918c authored by Mael NICOLAS's avatar Mael NICOLAS

Merge branch 'servant-update' into 'dev'

Servant update

See merge request gargantext/crawlers/hal!3
parents 8e6da01e 1602ca38
......@@ -6,14 +6,36 @@ import Network.HTTP.Client (newManager)
import Network.HTTP.Client.TLS (tlsManagerSettings)
import Servant.Client
import HAL (runStructureRequest)
import HAL.Client
import HAL.Doc
import Tree
main :: IO ()
main = do
manager' <- newManager tlsManagerSettings
res <- runClientM
(search ["docid"] ["docType_s:(THESE OR HDR)"] (asc "docid") (Just 1) (Just 2))
(mkClientEnv manager' $ BaseUrl Https "api.archives-ouvertes.fr" 443 "")
res <- runStructureRequest $ Just "parentDocid_i:302102"
case res of
(Left err) -> print err
(Right val) -> print val
{-
main :: IO ()
main = do
rootRes <- runHalAPIClient $ structure (Just $ fqRootDoc) (Just "docid, parentDocid_i, label_s") (Just 10000)
case rootRes of
(Left err) -> print err
(Right val) -> do
childrenRes <- runHalAPIClient $ structure (Just $ fqParentDoc) (Just "docid, parentDocid_i, label_s") (Just 10000)
case childrenRes of
(Left err2) -> print err2
(Right val2) -> do
children <- fetchChildren $ val2 ^. docs
let trees = buildTree 0 children <$> (val ^. docs)
let noDuplicateTrees = noDuplicateTree <$> trees
mapM_ putStrLn $ formatTree <$> noDuplicateTrees
where fqParentDoc =
"parentDocid_i:(302102 || 469216 || 6279 || 224096 || 144103 || 497330 || 84538 || 301262 || 481355 || 29212 || 301442 || 542824 || 300362 || 352124 || 300104 || 421532 || 301492)"
fqRootDoc =
"docid:(302102 || 469216 || 6279 || 224096 || 144103 || 497330 || 84538 || 301262 || 481355 || 29212 || 301442 || 542824 || 300362 || 352124 || 300104 || 421532 || 301492)"
-}
-- This file has been generated from package.yaml by hpack version 0.28.2.
--
-- see: https://github.com/sol/hpack
--
-- hash: 9634b793447cd493f4caaa2fa593379ae858856de45a2b71bb012af23e3c40f1
name: crawlerHAL
version: 0.1.0.0
description: Please see the README at <https://git@gitlab.iscpif.fr:20022/gargantext/crawlers/hal>
homepage: https://github.com/https://git@gitlab.iscpif.fr:20022/gargantext/crawlers/hal#readme
bug-reports: https://github.com/https://git@gitlab.iscpif.fr:20022/gargantext/crawlers/hal/issues
author: CNRS/IMT
maintainer: contact@gargantext.org
copyright: 2019 CNRS/IMT
license: BSD3
license-file: LICENSE
build-type: Simple
cabal-version: >= 1.10
extra-source-files:
ChangeLog.md
README.md
source-repository head
type: git
location: https://github.com/https://git@gitlab.iscpif.fr:20022/gargantext/crawlers/hal
library
exposed-modules:
HAL
HAL.Client
HAL.Doc
Tree
other-modules:
Paths_crawlerHAL
hs-source-dirs:
src
build-depends:
aeson
, base >=4.7 && <5
, bytestring
, containers
, data-default
, http-client
, http-client-tls
, lens
, scientific
, servant
, servant-client
, split
, text
, utf8-string
, vector
default-language: Haskell2010
executable crawlerHAL-exe
main-is: Main.hs
other-modules:
Paths_crawlerHAL
hs-source-dirs:
app
ghc-options: -threaded -rtsopts -with-rtsopts=-N
build-depends:
aeson
, base >=4.7 && <5
, bytestring
, containers
, crawlerHAL
, data-default
, http-client
, http-client-tls
, lens
, scientific
, servant
, servant-client
, split
, text
, utf8-string
, vector
default-language: Haskell2010
test-suite halCrawler-test
type: exitcode-stdio-1.0
main-is: Spec.hs
other-modules:
Paths_crawlerHAL
hs-source-dirs:
test
ghc-options: -threaded -rtsopts -with-rtsopts=-N
build-depends:
aeson
, base >=4.7 && <5
, bytestring
, containers
, data-default
, halCrawler
, http-client
, http-client-tls
, lens
, scientific
, servant
, servant-client
, split
, text
, utf8-string
, vector
default-language: Haskell2010
name: halCrawler
name: crawlerHAL
version: 0.1.0.0
github: "githubuser/halCrawler"
github: "https://git@gitlab.iscpif.fr:20022/gargantext/crawlers/hal"
license: BSD3
author: "Author name here"
maintainer: "example@example.com"
copyright: "2019 Author name here"
author: "CNRS/IMT"
maintainer: "contact@gargantext.org"
copyright: "2019 CNRS/IMT"
extra-source-files:
- README.md
......@@ -17,7 +17,7 @@ extra-source-files:
# To avoid duplicated efforts in documentation and dealing with the
# complications of embedding Haddock markup inside cabal files, it is
# common to point users to the README.md file.
description: Please see the README on GitHub at <https://github.com/githubuser/halCrawler#readme>
description: Please see the README at <https://git@gitlab.iscpif.fr:20022/gargantext/crawlers/hal>
dependencies:
- base >= 4.7 && < 5
......@@ -34,12 +34,13 @@ dependencies:
- split
- scientific
- vector
- data-default
library:
source-dirs: src
executables:
halCrawler-exe:
crawlerHAL-exe:
main: Main.hs
source-dirs: app
ghc-options:
......@@ -47,7 +48,7 @@ executables:
- -rtsopts
- -with-rtsopts=-N
dependencies:
- halCrawler
- crawlerHAL
tests:
halCrawler-test:
......
{-# LANGUAGE OverloadedStrings #-}
module HAL where
import Data.Default (def)
import Data.Text
import Network.HTTP.Client (newManager)
import Network.HTTP.Client.TLS (tlsManagerSettings)
import Servant.Client (BaseUrl(..), Scheme(..), ClientM, ClientError, runClientM, mkClientEnv)
import HAL.Client
import HAL.Doc
runHalAPIClient :: ClientM (Response Doc) -> IO (Either ClientError (Response Doc))
runHalAPIClient cmd = do
manager' <- newManager tlsManagerSettings
runClientM cmd (mkClientEnv manager' $ BaseUrl Https "api.archives-ouvertes.fr" 443 "")
runStructureRequest :: Maybe Text -> IO (Either ClientError (Response Doc))
runStructureRequest rq =
runHalAPIClient $ structure def rq (Just 10000)
runSearchRequest :: [Text] -> IO (Either ClientError (Response Doc))
runSearchRequest rq =
runHalAPIClient $ search def rq Nothing Nothing Nothing
......@@ -3,6 +3,7 @@
{-# LANGUAGE TypeOperators #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE NoMonomorphismRestriction#-}
module HAL.Client where
......@@ -11,6 +12,7 @@ import GHC.Generics
import Servant.API
import Servant.Client hiding (Response)
import Data.Text
import Data.Map
import Data.Aeson
import qualified Data.ByteString.Lazy as BSL
......@@ -18,17 +20,12 @@ import qualified Codec.Binary.UTF8.String as UTF
import Control.Lens as L (makeLenses)
type HALAPI = Search :<|> Structure
type Structure = "ref" :> "structure"
:> QueryParam "fq" Text
:> QueryParam "fl" Text
:> QueryParam "rows" Int
:> Get '[JSON] Response
type HALAPI doc = Search doc
:<|> Structure doc
type Search = "search"
type Search doc = "search"
-- fl determine which fields will be returned it can be a list of fields or *
:> QueryParams "fl" Text
:> QueryParam "fl" doc
-- TODO: type this monster
-- fq is to filter request
:> QueryParams "fq" Text
......@@ -38,7 +35,13 @@ type Search = "search"
:> QueryParam "start" Int
-- use rows to make the request only return the x number of result
:> QueryParam "rows" Int
:> Get '[JSON] Response
:> Get '[JSON] (Response doc)
type Structure doc = "ref" :> "structure"
:> QueryParam "fl" doc
:> QueryParam "fq" Text
:> QueryParam "rows" Int
:> Get '[JSON] (Response doc)
-- Get's argument type
data SortField = Asc Text | Desc Text
......@@ -53,49 +56,52 @@ asc = Just . Asc
desc :: Text -> Maybe SortField
desc = Just . Desc
newtype Doc = Doc (Map Text Value)
deriving (Generic)
instance Eq Doc where
(==) (Doc doc) (Doc doc') = (doc ! "docid") == (doc' ! "docid")
instance Show Doc where
show (Doc o) = (UTF.decode $ BSL.unpack $ encode $ o ! "label_s")
<> "("
<> (show . encode $ o ! "docid")
<> ")"
instance FromJSON Doc
instance ToJSON Doc
-- newtype Doc = Doc (Map Text Value)
-- deriving (Generic)
--
--instance Eq Doc where
-- (==) (Doc doc) (Doc doc') = (doc ! "docid") == (doc' ! "docid")
--
--instance Show Doc where
-- show (Doc o) = (UTF.decode $ BSL.unpack $ encode $ o ! "label_s")
-- <> "("
-- <> (show . encode $ o ! "docid")
-- <> ")"
--
--instance FromJSON Doc
--instance ToJSON Doc
-- Response type
data Response = Response
data Response doc = Response
{
_numFound :: Integer,
_start :: Int,
_docs :: [Doc]
_docs :: [doc]
} deriving (Show, Generic)
L.makeLenses ''Response
instance FromJSON Response where
instance FromJSON doc => FromJSON (Response doc) where
parseJSON (Object o) = Response <$>
((o .: "response") >>= (.: "numFound"))
<*> ((o .: "response") >>= (.: "start"))
<*> ((o .: "response") >>= (.: "docs"))
halAPI :: Proxy HALAPI
halAPI :: Proxy (HALAPI doc)
halAPI = Proxy
structure :: Maybe Text -- fq
-> Maybe Text
-> Maybe Int -- rows
-> ClientM Response
search :: [Text] -- fl
-- search should always have at least `docid` and `label_s` in his fl params
search :: (FromJSON doc, ToHttpApiData doc) =>
Maybe doc -- fl
-> [Text] -- fq
-> Maybe SortField -- sort
-> Maybe Int -- start
-> Maybe Int -- rows
-> ClientM Response
-> ClientM (Response doc)
structure :: (FromJSON doc, ToHttpApiData doc) =>
Maybe doc
-> Maybe Text -- fq
-> Maybe Int -- rows
-> ClientM (Response doc)
(search :<|> structure) = client halAPI
{-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE OverloadedStrings #-}
module HAL.Doc where
import GHC.Generics
import Data.Aeson ((.:), (.:?), (.!=), Value(..), ToJSON, FromJSON(..), encode)
import Data.Default
import Data.Text (pack, Text)
import Servant.API (ToHttpApiData(..))
data Doc = Doc
{
_docid :: Int,
_label_s :: Maybe Text,
_parentDocid_i :: [Text]
} deriving (Generic)
instance Default Doc where
def = Doc def def def
instance FromJSON Doc where
parseJSON (Object o) = Doc <$>
(o .: "docid")
<*> (o .:? "label_s")
<*> (o .:? "parentDocid_i" .!= [])
instance ToHttpApiData Doc where
toUrlPiece _ = "docid,label_s,parentDocid_i"
instance Show Doc where
show (Doc id label _) = show label
<> "(" <> show id <> ")"
{-# LANGUAGE DataKinds #-}
{-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE TypeOperators #-}
{-# LANGUAGE OverloadedStrings #-}
module Tree where
import GHC.Generics
import Network.HTTP.Client (newManager)
import Network.HTTP.Client.TLS (tlsManagerSettings)
import Servant.Client hiding (Response)
......@@ -13,8 +11,6 @@ import Control.Lens.Getter ((^.))
import Data.Map ((!?), (!), insert, empty, Map, fromList, toList)
import qualified Data.Map as M
import Data.Map.Internal (merge, preserveMissing, zipWithMatched)
import Data.Text (pack, Text)
import Data.Aeson (Value(..), ToJSON, encode)
import Data.List (groupBy, isInfixOf)
import Data.List.Split (chunksOf)
import Data.Either (rights)
......@@ -24,34 +20,17 @@ import qualified Data.Vector as V
import Text.Printf
import HAL.Client
runHalAPIClient :: ClientM Response -> IO (Either ServantError Response)
runHalAPIClient cmd = do
manager' <- newManager tlsManagerSettings
runClientM cmd (mkClientEnv manager' $ BaseUrl Https "api.archives-ouvertes.fr" 443 "")
docid :: Doc -> Value
docid (Doc doc) = doc ! "docid"
import HAL.Doc
{-
formatParentIdRequest :: [Doc] -> Maybe Text
formatParentIdRequest [] = Nothing
formatParentIdRequest (x:[]) = Just . pack . show $ docid x
formatParentIdRequest (x:[]) = Just . pack . show $ _docid x
formatParentIdRequest (x:xs) =
(Just . pack . show $ docid x)
(Just . pack . show $ _docid x)
<> (Just " || ")
<> formatParentIdRequest xs
runStructureRequest :: Maybe Text -> IO (Either ServantError Response)
runStructureRequest rq =
runHalAPIClient $ structure rq (Just "docid, parentDocid_i, label_s") (Just 10000)
scientific2text :: Scientific -> Text
scientific2text n =
case floatingOrInteger n of
Left r -> pack $ show (r :: Double)
Right i -> pack $ show (i :: Integer)
ds2Child :: [Doc] -> IO [Doc]
ds2Child ds = do
rs <- sequence $ runStructureRequest <$> formatedRequests ds
......@@ -64,9 +43,9 @@ fetchChildren [] = pure []
fetchChildren ds = (ds <>) <$> (fetchChildren =<< ds2Child ds)
isChildOf :: Doc -> Doc -> Bool
isChildOf (Doc d) (Doc d') = not . null $ V.filter (\id -> id == (scientific2text docId)) ((\(String a) -> a) <$> parentDocIds)
where (Number docId ) = (d ! "docid")
(Array parentDocIds) = (d' ! "parentDocid_i")
isChildOf (Doc i l) (Doc i' l') = not . null $ V.filter (\id -> id == (scientific2text docId)) ((\(String a) -> a) <$> parentDocIds)
where (Number docId ) = d ! "docid"
(Array parentDocIds) = d' ! "parentDocid_i"
data DocTree = DocTree Doc Int [DocTree]
deriving (Show, Generic)
......@@ -101,3 +80,5 @@ removeDuplicate deepMap tree@(DocTree doc depth children) = DocTree doc depth (r
noDuplicateTree :: DocTree -> DocTree
noDuplicateTree tree = removeDuplicate (findDeepest M.empty tree) tree
-}
......@@ -17,7 +17,7 @@
#
# resolver: ./custom-snapshot.yaml
# resolver: https://example.com/snapshots/2018-01-01.yaml
resolver: lts-13.10
resolver: lts-14.1
# User packages to be built.
# Various formats can be used as shown in the example below.
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment