Commit 8e6da01e authored by Mael NICOLAS's avatar Mael NICOLAS

add /structures + some utilities to construct trees

parent 32b49236
...@@ -28,6 +28,12 @@ dependencies: ...@@ -28,6 +28,12 @@ dependencies:
- http-client - http-client
- text - text
- containers - containers
- lens
- bytestring
- utf8-string
- split
- scientific
- vector
library: library:
source-dirs: src source-dirs: src
......
...@@ -2,6 +2,7 @@ ...@@ -2,6 +2,7 @@
{-# LANGUAGE DeriveGeneric #-} {-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE TypeOperators #-} {-# LANGUAGE TypeOperators #-}
{-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE TemplateHaskell #-}
module HAL.Client where module HAL.Client where
...@@ -12,8 +13,18 @@ import Servant.Client hiding (Response) ...@@ -12,8 +13,18 @@ import Servant.Client hiding (Response)
import Data.Text import Data.Text
import Data.Map import Data.Map
import Data.Aeson import Data.Aeson
import qualified Data.ByteString.Lazy as BSL
import qualified Codec.Binary.UTF8.String as UTF
type HALAPI = Search import Control.Lens as L (makeLenses)
type HALAPI = Search :<|> Structure
type Structure = "ref" :> "structure"
:> QueryParam "fq" Text
:> QueryParam "fl" Text
:> QueryParam "rows" Int
:> Get '[JSON] Response
type Search = "search" type Search = "search"
-- fl determine which fields will be returned it can be a list of fields or * -- fl determine which fields will be returned it can be a list of fields or *
...@@ -42,13 +53,30 @@ asc = Just . Asc ...@@ -42,13 +53,30 @@ asc = Just . Asc
desc :: Text -> Maybe SortField desc :: Text -> Maybe SortField
desc = Just . Desc desc = Just . Desc
newtype Doc = Doc (Map Text Value)
deriving (Generic)
instance Eq Doc where
(==) (Doc doc) (Doc doc') = (doc ! "docid") == (doc' ! "docid")
instance Show Doc where
show (Doc o) = (UTF.decode $ BSL.unpack $ encode $ o ! "label_s")
<> "("
<> (show . encode $ o ! "docid")
<> ")"
instance FromJSON Doc
instance ToJSON Doc
-- Response type -- Response type
data Response = Response data Response = Response
{ {
numFound :: Integer, _numFound :: Integer,
start :: Int, _start :: Int,
docs :: [Doc] _docs :: [Doc]
} deriving (Show, Generic) } deriving (Show, Generic)
L.makeLenses ''Response
instance FromJSON Response where instance FromJSON Response where
parseJSON (Object o) = Response <$> parseJSON (Object o) = Response <$>
...@@ -56,17 +84,18 @@ instance FromJSON Response where ...@@ -56,17 +84,18 @@ instance FromJSON Response where
<*> ((o .: "response") >>= (.: "start")) <*> ((o .: "response") >>= (.: "start"))
<*> ((o .: "response") >>= (.: "docs")) <*> ((o .: "response") >>= (.: "docs"))
newtype Doc = Doc (Map Text Value)
deriving (Show, Generic)
instance FromJSON Doc
halAPI :: Proxy HALAPI halAPI :: Proxy HALAPI
halAPI = Proxy halAPI = Proxy
search :: [Text] structure :: Maybe Text -- fq
-> [Text] -> Maybe Text
-> Maybe SortField -> Maybe Int -- rows
-> Maybe Int -> ClientM Response
-> Maybe Int search :: [Text] -- fl
-> [Text] -- fq
-> Maybe SortField -- sort
-> Maybe Int -- start
-> Maybe Int -- rows
-> ClientM Response -> ClientM Response
search = client halAPI (search :<|> structure) = client halAPI
{-# LANGUAGE DataKinds #-}
{-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE TypeOperators #-}
{-# LANGUAGE OverloadedStrings #-}
module Tree where
import GHC.Generics
import Network.HTTP.Client (newManager)
import Network.HTTP.Client.TLS (tlsManagerSettings)
import Servant.Client hiding (Response)
import Control.Lens.Getter ((^.))
import Data.Map ((!?), (!), insert, empty, Map, fromList, toList)
import qualified Data.Map as M
import Data.Map.Internal (merge, preserveMissing, zipWithMatched)
import Data.Text (pack, Text)
import Data.Aeson (Value(..), ToJSON, encode)
import Data.List (groupBy, isInfixOf)
import Data.List.Split (chunksOf)
import Data.Either (rights)
import Data.Maybe (fromMaybe)
import Data.Scientific (Scientific, floatingOrInteger)
import qualified Data.Vector as V
import Text.Printf
import HAL.Client
runHalAPIClient :: ClientM Response -> IO (Either ServantError Response)
runHalAPIClient cmd = do
manager' <- newManager tlsManagerSettings
runClientM cmd (mkClientEnv manager' $ BaseUrl Https "api.archives-ouvertes.fr" 443 "")
docid :: Doc -> Value
docid (Doc doc) = doc ! "docid"
formatParentIdRequest :: [Doc] -> Maybe Text
formatParentIdRequest [] = Nothing
formatParentIdRequest (x:[]) = Just . pack . show $ docid x
formatParentIdRequest (x:xs) =
(Just . pack . show $ docid x)
<> (Just " || ")
<> formatParentIdRequest xs
runStructureRequest :: Maybe Text -> IO (Either ServantError Response)
runStructureRequest rq =
runHalAPIClient $ structure rq (Just "docid, parentDocid_i, label_s") (Just 10000)
scientific2text :: Scientific -> Text
scientific2text n =
case floatingOrInteger n of
Left r -> pack $ show (r :: Double)
Right i -> pack $ show (i :: Integer)
ds2Child :: [Doc] -> IO [Doc]
ds2Child ds = do
rs <- sequence $ runStructureRequest <$> formatedRequests ds
return . concat $ (^. docs) <$> rights rs
where formatedRequest docs' = Just "parentDocid_i:(" <> formatParentIdRequest docs' <> Just ")"
formatedRequests docs'' = formatedRequest <$> chunksOf 100 docs''
fetchChildren :: [Doc] -> IO [Doc]
fetchChildren [] = pure []
fetchChildren ds = (ds <>) <$> (fetchChildren =<< ds2Child ds)
isChildOf :: Doc -> Doc -> Bool
isChildOf (Doc d) (Doc d') = not . null $ V.filter (\id -> id == (scientific2text docId)) ((\(String a) -> a) <$> parentDocIds)
where (Number docId ) = (d ! "docid")
(Array parentDocIds) = (d' ! "parentDocid_i")
data DocTree = DocTree Doc Int [DocTree]
deriving (Show, Generic)
instance ToJSON DocTree
buildTree :: Int -> [Doc] -> Doc -> DocTree
buildTree depth docs id = DocTree id depth (buildTree (depth + 1) docs <$> children)
where children = filter (isChildOf id) docs
formatTree :: DocTree -> String
formatTree tree@(DocTree doc depth children) =
printf "%s%s\n%s" (addSpace) (show doc) (concat $ formatTree <$> children)
where addSpace
| depth > 0 = "├" <> (replicate (depth * depth + depth) '─')
| otherwise = "🌲"
findDeepest :: Map Scientific Int -> DocTree -> Map Scientific Int
findDeepest m tree@(DocTree doc depth children) =
mergeMap map1 maps
where map1 = insert docid' depth m
maps = foldl mergeMap empty $ findDeepest m <$> children
mergeMap m1 m2 = merge preserveMissing preserveMissing (zipWithMatched whenMatch) m1 m2
whenMatch k v v' = if v > v' then v else v'
(Number docid') = docid doc
isDeep :: Map Scientific Int -> DocTree -> Bool
isDeep m (DocTree i depth _) = depth >= (fromMaybe 0 $ M.lookup id m)
where (Number id) = docid i
removeDuplicate :: Map Scientific Int -> DocTree -> DocTree
removeDuplicate deepMap tree@(DocTree doc depth children) = DocTree doc depth (removeDuplicate deepMap <$> filter (isDeep deepMap) children)
noDuplicateTree :: DocTree -> DocTree
noDuplicateTree tree = removeDuplicate (findDeepest M.empty tree) tree
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment