Commit 8e6da01e authored by Mael NICOLAS's avatar Mael NICOLAS

add /structures + some utilities to construct trees

parent 32b49236
......@@ -28,6 +28,12 @@ dependencies:
- http-client
- text
- containers
- lens
- bytestring
- utf8-string
- split
- scientific
- vector
library:
source-dirs: src
......
......@@ -2,6 +2,7 @@
{-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE TypeOperators #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE TemplateHaskell #-}
module HAL.Client where
......@@ -12,8 +13,18 @@ import Servant.Client hiding (Response)
import Data.Text
import Data.Map
import Data.Aeson
import qualified Data.ByteString.Lazy as BSL
import qualified Codec.Binary.UTF8.String as UTF
type HALAPI = Search
import Control.Lens as L (makeLenses)
type HALAPI = Search :<|> Structure
type Structure = "ref" :> "structure"
:> QueryParam "fq" Text
:> QueryParam "fl" Text
:> QueryParam "rows" Int
:> Get '[JSON] Response
type Search = "search"
-- fl determine which fields will be returned it can be a list of fields or *
......@@ -42,13 +53,30 @@ asc = Just . Asc
desc :: Text -> Maybe SortField
desc = Just . Desc
newtype Doc = Doc (Map Text Value)
deriving (Generic)
instance Eq Doc where
(==) (Doc doc) (Doc doc') = (doc ! "docid") == (doc' ! "docid")
instance Show Doc where
show (Doc o) = (UTF.decode $ BSL.unpack $ encode $ o ! "label_s")
<> "("
<> (show . encode $ o ! "docid")
<> ")"
instance FromJSON Doc
instance ToJSON Doc
-- Response type
data Response = Response
{
numFound :: Integer,
start :: Int,
docs :: [Doc]
_numFound :: Integer,
_start :: Int,
_docs :: [Doc]
} deriving (Show, Generic)
L.makeLenses ''Response
instance FromJSON Response where
parseJSON (Object o) = Response <$>
......@@ -56,17 +84,18 @@ instance FromJSON Response where
<*> ((o .: "response") >>= (.: "start"))
<*> ((o .: "response") >>= (.: "docs"))
newtype Doc = Doc (Map Text Value)
deriving (Show, Generic)
instance FromJSON Doc
halAPI :: Proxy HALAPI
halAPI = Proxy
search :: [Text]
-> [Text]
-> Maybe SortField
-> Maybe Int
-> Maybe Int
structure :: Maybe Text -- fq
-> Maybe Text
-> Maybe Int -- rows
-> ClientM Response
search :: [Text] -- fl
-> [Text] -- fq
-> Maybe SortField -- sort
-> Maybe Int -- start
-> Maybe Int -- rows
-> ClientM Response
search = client halAPI
(search :<|> structure) = client halAPI
{-# LANGUAGE DataKinds #-}
{-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE TypeOperators #-}
{-# LANGUAGE OverloadedStrings #-}
module Tree where
import GHC.Generics
import Network.HTTP.Client (newManager)
import Network.HTTP.Client.TLS (tlsManagerSettings)
import Servant.Client hiding (Response)
import Control.Lens.Getter ((^.))
import Data.Map ((!?), (!), insert, empty, Map, fromList, toList)
import qualified Data.Map as M
import Data.Map.Internal (merge, preserveMissing, zipWithMatched)
import Data.Text (pack, Text)
import Data.Aeson (Value(..), ToJSON, encode)
import Data.List (groupBy, isInfixOf)
import Data.List.Split (chunksOf)
import Data.Either (rights)
import Data.Maybe (fromMaybe)
import Data.Scientific (Scientific, floatingOrInteger)
import qualified Data.Vector as V
import Text.Printf
import HAL.Client
runHalAPIClient :: ClientM Response -> IO (Either ServantError Response)
runHalAPIClient cmd = do
manager' <- newManager tlsManagerSettings
runClientM cmd (mkClientEnv manager' $ BaseUrl Https "api.archives-ouvertes.fr" 443 "")
docid :: Doc -> Value
docid (Doc doc) = doc ! "docid"
formatParentIdRequest :: [Doc] -> Maybe Text
formatParentIdRequest [] = Nothing
formatParentIdRequest (x:[]) = Just . pack . show $ docid x
formatParentIdRequest (x:xs) =
(Just . pack . show $ docid x)
<> (Just " || ")
<> formatParentIdRequest xs
runStructureRequest :: Maybe Text -> IO (Either ServantError Response)
runStructureRequest rq =
runHalAPIClient $ structure rq (Just "docid, parentDocid_i, label_s") (Just 10000)
scientific2text :: Scientific -> Text
scientific2text n =
case floatingOrInteger n of
Left r -> pack $ show (r :: Double)
Right i -> pack $ show (i :: Integer)
ds2Child :: [Doc] -> IO [Doc]
ds2Child ds = do
rs <- sequence $ runStructureRequest <$> formatedRequests ds
return . concat $ (^. docs) <$> rights rs
where formatedRequest docs' = Just "parentDocid_i:(" <> formatParentIdRequest docs' <> Just ")"
formatedRequests docs'' = formatedRequest <$> chunksOf 100 docs''
fetchChildren :: [Doc] -> IO [Doc]
fetchChildren [] = pure []
fetchChildren ds = (ds <>) <$> (fetchChildren =<< ds2Child ds)
isChildOf :: Doc -> Doc -> Bool
isChildOf (Doc d) (Doc d') = not . null $ V.filter (\id -> id == (scientific2text docId)) ((\(String a) -> a) <$> parentDocIds)
where (Number docId ) = (d ! "docid")
(Array parentDocIds) = (d' ! "parentDocid_i")
data DocTree = DocTree Doc Int [DocTree]
deriving (Show, Generic)
instance ToJSON DocTree
buildTree :: Int -> [Doc] -> Doc -> DocTree
buildTree depth docs id = DocTree id depth (buildTree (depth + 1) docs <$> children)
where children = filter (isChildOf id) docs
formatTree :: DocTree -> String
formatTree tree@(DocTree doc depth children) =
printf "%s%s\n%s" (addSpace) (show doc) (concat $ formatTree <$> children)
where addSpace
| depth > 0 = "├" <> (replicate (depth * depth + depth) '─')
| otherwise = "🌲"
findDeepest :: Map Scientific Int -> DocTree -> Map Scientific Int
findDeepest m tree@(DocTree doc depth children) =
mergeMap map1 maps
where map1 = insert docid' depth m
maps = foldl mergeMap empty $ findDeepest m <$> children
mergeMap m1 m2 = merge preserveMissing preserveMissing (zipWithMatched whenMatch) m1 m2
whenMatch k v v' = if v > v' then v else v'
(Number docid') = docid doc
isDeep :: Map Scientific Int -> DocTree -> Bool
isDeep m (DocTree i depth _) = depth >= (fromMaybe 0 $ M.lookup id m)
where (Number id) = docid i
removeDuplicate :: Map Scientific Int -> DocTree -> DocTree
removeDuplicate deepMap tree@(DocTree doc depth children) = DocTree doc depth (removeDuplicate deepMap <$> filter (isDeep deepMap) children)
noDuplicateTree :: DocTree -> DocTree
noDuplicateTree tree = removeDuplicate (findDeepest M.empty tree) tree
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment