Commit 802033bd authored by Mudada's avatar Mudada

refactored Tree

parent 9acf7ad9
......@@ -12,7 +12,10 @@ import Servant.Client (BaseUrl(..), Scheme(..), ClientM, ClientError, runClientM
import HAL.Client
import HAL.Doc.Corpus
import HAL.Doc.Struct
import Servant.API
import Data.Aeson
getMetadataWith :: Text -> Maybe Int -> IO (Either ClientError (Response Corpus))
getMetadataWith q l = do
......@@ -22,14 +25,18 @@ getMetadataWith q l = do
requestedFields :: Text
requestedFields = "docid,title_s,abstract_s,submittedDate_s,source_s,authFullName_s,authOrganism_s"
runHalAPIClient :: ClientM (Response Corpus) -> IO (Either ClientError (Response Corpus))
structFields :: Text
structFields = "docid,label_s,parentDocid_i"
runHalAPIClient :: (FromJSON doc, ToHttpApiData doc) =>
ClientM (Response doc) -> IO (Either ClientError (Response doc))
runHalAPIClient cmd = do
manager' <- newManager tlsManagerSettings
runClientM cmd (mkClientEnv manager' $ BaseUrl Https "api.archives-ouvertes.fr" 443 "")
runStructureRequest :: Maybe Text -> IO (Either ClientError (Response Corpus))
runStructureRequest :: Maybe Text -> IO (Either ClientError (Response Struct))
runStructureRequest rq =
runHalAPIClient $ structure (Just requestedFields) rq (Just 10000)
runHalAPIClient $ structure (Just structFields) rq (Just 10000)
runSearchRequest :: [Text] -> IO (Either ClientError (Response Corpus))
runSearchRequest rq =
......
......@@ -18,7 +18,7 @@ data Struct = Struct
{
_struct_docid :: Int,
_struct_label :: Text,
_struct_parent_docid :: [Int]
_struct_parent_docid :: [Text]
} deriving (Show, Generic)
L.makeLenses ''Struct
......
{-# LANGUAGE DataKinds #-}
{-# LANGUAGE TypeOperators #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE DeriveGeneric #-}
module Tree where
......@@ -11,6 +12,7 @@ import Control.Lens.Getter ((^.))
import Data.Map ((!?), (!), insert, empty, Map, fromList, toList)
import qualified Data.Map as M
import Data.Map.Internal (merge, preserveMissing, zipWithMatched)
import Data.Aeson
import Data.List (groupBy, isInfixOf)
import Data.List.Split (chunksOf)
import Data.Either (rights)
......@@ -18,40 +20,40 @@ import Data.Maybe (fromMaybe)
import Data.Scientific (Scientific, floatingOrInteger)
import qualified Data.Vector as V
import Text.Printf
import GHC.Generics
import HAL.Client
import HAL.Doc
{-
formatParentIdRequest :: [Doc] -> Maybe Text
import HAL
import HAL.Doc.Struct
import Data.Text (Text, pack)
formatParentIdRequest :: [Struct] -> Maybe Text
formatParentIdRequest [] = Nothing
formatParentIdRequest (x:[]) = Just . pack . show $ _docid x
formatParentIdRequest (x:[]) = Just . pack . show $ _struct_docid x
formatParentIdRequest (x:xs) =
(Just . pack . show $ _docid x)
(Just . pack . show $ _struct_docid x)
<> (Just " || ")
<> formatParentIdRequest xs
ds2Child :: [Doc] -> IO [Doc]
ds2Child :: [Struct] -> IO [Struct]
ds2Child ds = do
rs <- sequence $ runStructureRequest <$> formatedRequests ds
return . concat $ (^. docs) <$> rights rs
where formatedRequest docs' = Just "parentDocid_i:(" <> formatParentIdRequest docs' <> Just ")"
formatedRequests docs'' = formatedRequest <$> chunksOf 100 docs''
fetchChildren :: [Doc] -> IO [Doc]
fetchChildren :: [Struct] -> IO [Struct]
fetchChildren [] = pure []
fetchChildren ds = (ds <>) <$> (fetchChildren =<< ds2Child ds)
isChildOf :: Doc -> Doc -> Bool
isChildOf (Doc i l) (Doc i' l') = not . null $ V.filter (\id -> id == (scientific2text docId)) ((\(String a) -> a) <$> parentDocIds)
where (Number docId ) = d ! "docid"
(Array parentDocIds) = d' ! "parentDocid_i"
isChildOf :: Struct -> Struct -> Bool
isChildOf (Struct i l p) (Struct i' l' p') = not . null $ filter (\id -> id == (pack $ show i)) p'
data DocTree = DocTree Doc Int [DocTree]
data DocTree = DocTree Struct Int [DocTree]
deriving (Show, Generic)
instance ToJSON DocTree
buildTree :: Int -> [Doc] -> Doc -> DocTree
buildTree :: Int -> [Struct] -> Struct -> DocTree
buildTree depth docs id = DocTree id depth (buildTree (depth + 1) docs <$> children)
where children = filter (isChildOf id) docs
......@@ -62,23 +64,19 @@ formatTree tree@(DocTree doc depth children) =
| depth > 0 = "├" <> (replicate (depth * depth + depth) '─')
| otherwise = "🌲"
findDeepest :: Map Scientific Int -> DocTree -> Map Scientific Int
findDeepest m tree@(DocTree doc depth children) =
findDeepest :: Map Int Int -> DocTree -> Map Int Int
findDeepest m tree@(DocTree doc@(Struct docid _ _) depth children) =
mergeMap map1 maps
where map1 = insert docid' depth m
where map1 = insert docid depth m
maps = foldl mergeMap empty $ findDeepest m <$> children
mergeMap m1 m2 = merge preserveMissing preserveMissing (zipWithMatched whenMatch) m1 m2
whenMatch k v v' = if v > v' then v else v'
(Number docid') = docid doc
isDeep :: Map Scientific Int -> DocTree -> Bool
isDeep m (DocTree i depth _) = depth >= (fromMaybe 0 $ M.lookup id m)
where (Number id) = docid i
isDeep :: Map Int Int -> DocTree -> Bool
isDeep m (DocTree (Struct id _ _) depth _) = depth >= (fromMaybe 0 $ M.lookup id m)
removeDuplicate :: Map Scientific Int -> DocTree -> DocTree
removeDuplicate :: Map Int Int -> DocTree -> DocTree
removeDuplicate deepMap tree@(DocTree doc depth children) = DocTree doc depth (removeDuplicate deepMap <$> filter (isDeep deepMap) children)
noDuplicateTree :: DocTree -> DocTree
noDuplicateTree tree = removeDuplicate (findDeepest M.empty tree) tree
-}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment