Commit 229fdf40 authored by mzheng's avatar mzheng

using structIsChildOf_fs instead of instStructIsChildOf_fs for better grouping

parent d54812d5
......@@ -232,7 +232,7 @@ baseFields = [ "docid"
, "authFullName_s"
, "authOrganism_s"
, "structIdName_fs"
, "instStructIsChildOf_fs" ]
, "structIsChildOf_fs" ]
structFields :: Text
structFields = "docid,label_s,parentDocid_i"
......
......@@ -26,7 +26,7 @@ data Document = Document
, _document_source :: Maybe Text
, _document_authors_names :: [Text]
, _document_authors_affiliations :: [Text]
, _document_institutes_tree :: Map Text Text
, _document_institutes_tree :: Map Text [Text]
, _document_struct_id :: [Int]
, _document_original :: Object
} deriving (Show, Generic)
......@@ -51,9 +51,9 @@ instance FromJSON Document where
let _document_struct_id = map fst structIdname
let _document_authors_affiliations = map snd structIdname
structChild <- o .:? "instStructIsChildOf_fs" :: Parser (Maybe [Text])
structChild <- o .:? "structIsChildOf_fs" :: Parser (Maybe [Text])
let _document_institutes_tree = Map.fromList $ getStructParentChild structChild
let _document_institutes_tree = Map.fromListWith (++) $ getStructParentChild $ removeUnusedData $ fromMaybe [] structChild
abstracts <-
mapM (\lang -> do
......@@ -65,6 +65,16 @@ instance FromJSON Document where
pure $ Document { .. }
-- | Remove one line out two because the field `structIsChildOf_fs` contains
-- informations that is not used for now, the type of the structure (e.g. laboratory, institute, etc...)
--
-- Exemple of request :
-- https://api.archives-ouvertes.fr/search/?q=(en_title_t:(glass)%20OR%20en_abstract_t:(glass))%20AND%20(structId_i:6279)%20AND%20(language_s:en)&wt=json&fl=docid,label_s,uri_s,structId_i,title_s,structIsChildOf_fs&rows=80)
removeUnusedData :: [a] -> [a]
removeUnusedData (_ : struct : structs) = struct : removeUnusedData structs
removeUnusedData [_] = []
removeUnusedData [] = []
-- | Parses the field structIdName_fs that looks like :
-- > StructId_FacetSep_StructName
--
......@@ -76,8 +86,8 @@ getStructIdsNames (Just idsNames) = map (\tab -> (P.read (T.unpack (P.head tab))
splitInstitutes = P.map (T.splitOn (T.pack "_FacetSep_"))
getStructIdsNames Nothing = []
getStructParentChild :: Maybe [T.Text] -> [(T.Text, T.Text)]
getStructParentChild (Just struct) = map (\tab -> (formatParent $ P.last tab, formatChild $ P.head tab)) $ splitStruct struct
getStructParentChild :: [T.Text] -> [(T.Text, [T.Text])]
getStructParentChild struct = map (\tab -> (formatParent $ P.last tab, [formatChild $ P.head tab] )) $ splitStruct struct
where
splitStruct :: [T.Text] -> [[T.Text]]
splitStruct = P.map (T.splitOn (T.pack "_JoinSep_"))
......@@ -94,7 +104,5 @@ getStructParentChild (Just struct) = map (\tab -> (formatParent $ P.last tab, fo
id = P.head splited
splited = T.splitOn "_FacetSep_" p
getStructParentChild Nothing = []
instance ToHttpApiData Document where
toUrlPiece _ = "docid,title_s,en_abstract_s,fr_abstract_s,submittedDate_s,source_s,authFullName_s,structId_i,structIdName_fs"
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment