Commit 229fdf40 authored by mzheng's avatar mzheng

using structIsChildOf_fs instead of instStructIsChildOf_fs for better grouping

parent d54812d5
...@@ -232,7 +232,7 @@ baseFields = [ "docid" ...@@ -232,7 +232,7 @@ baseFields = [ "docid"
, "authFullName_s" , "authFullName_s"
, "authOrganism_s" , "authOrganism_s"
, "structIdName_fs" , "structIdName_fs"
, "instStructIsChildOf_fs" ] , "structIsChildOf_fs" ]
structFields :: Text structFields :: Text
structFields = "docid,label_s,parentDocid_i" structFields = "docid,label_s,parentDocid_i"
......
...@@ -26,7 +26,7 @@ data Document = Document ...@@ -26,7 +26,7 @@ data Document = Document
, _document_source :: Maybe Text , _document_source :: Maybe Text
, _document_authors_names :: [Text] , _document_authors_names :: [Text]
, _document_authors_affiliations :: [Text] , _document_authors_affiliations :: [Text]
, _document_institutes_tree :: Map Text Text , _document_institutes_tree :: Map Text [Text]
, _document_struct_id :: [Int] , _document_struct_id :: [Int]
, _document_original :: Object , _document_original :: Object
} deriving (Show, Generic) } deriving (Show, Generic)
...@@ -51,9 +51,9 @@ instance FromJSON Document where ...@@ -51,9 +51,9 @@ instance FromJSON Document where
let _document_struct_id = map fst structIdname let _document_struct_id = map fst structIdname
let _document_authors_affiliations = map snd structIdname let _document_authors_affiliations = map snd structIdname
structChild <- o .:? "instStructIsChildOf_fs" :: Parser (Maybe [Text]) structChild <- o .:? "structIsChildOf_fs" :: Parser (Maybe [Text])
let _document_institutes_tree = Map.fromList $ getStructParentChild structChild let _document_institutes_tree = Map.fromListWith (++) $ getStructParentChild $ removeUnusedData $ fromMaybe [] structChild
abstracts <- abstracts <-
mapM (\lang -> do mapM (\lang -> do
...@@ -65,6 +65,16 @@ instance FromJSON Document where ...@@ -65,6 +65,16 @@ instance FromJSON Document where
pure $ Document { .. } pure $ Document { .. }
-- | Remove one line out two because the field `structIsChildOf_fs` contains
-- informations that is not used for now, the type of the structure (e.g. laboratory, institute, etc...)
--
-- Exemple of request :
-- https://api.archives-ouvertes.fr/search/?q=(en_title_t:(glass)%20OR%20en_abstract_t:(glass))%20AND%20(structId_i:6279)%20AND%20(language_s:en)&wt=json&fl=docid,label_s,uri_s,structId_i,title_s,structIsChildOf_fs&rows=80)
removeUnusedData :: [a] -> [a]
removeUnusedData (_ : struct : structs) = struct : removeUnusedData structs
removeUnusedData [_] = []
removeUnusedData [] = []
-- | Parses the field structIdName_fs that looks like : -- | Parses the field structIdName_fs that looks like :
-- > StructId_FacetSep_StructName -- > StructId_FacetSep_StructName
-- --
...@@ -76,8 +86,8 @@ getStructIdsNames (Just idsNames) = map (\tab -> (P.read (T.unpack (P.head tab)) ...@@ -76,8 +86,8 @@ getStructIdsNames (Just idsNames) = map (\tab -> (P.read (T.unpack (P.head tab))
splitInstitutes = P.map (T.splitOn (T.pack "_FacetSep_")) splitInstitutes = P.map (T.splitOn (T.pack "_FacetSep_"))
getStructIdsNames Nothing = [] getStructIdsNames Nothing = []
getStructParentChild :: Maybe [T.Text] -> [(T.Text, T.Text)] getStructParentChild :: [T.Text] -> [(T.Text, [T.Text])]
getStructParentChild (Just struct) = map (\tab -> (formatParent $ P.last tab, formatChild $ P.head tab)) $ splitStruct struct getStructParentChild struct = map (\tab -> (formatParent $ P.last tab, [formatChild $ P.head tab] )) $ splitStruct struct
where where
splitStruct :: [T.Text] -> [[T.Text]] splitStruct :: [T.Text] -> [[T.Text]]
splitStruct = P.map (T.splitOn (T.pack "_JoinSep_")) splitStruct = P.map (T.splitOn (T.pack "_JoinSep_"))
...@@ -94,7 +104,5 @@ getStructParentChild (Just struct) = map (\tab -> (formatParent $ P.last tab, fo ...@@ -94,7 +104,5 @@ getStructParentChild (Just struct) = map (\tab -> (formatParent $ P.last tab, fo
id = P.head splited id = P.head splited
splited = T.splitOn "_FacetSep_" p splited = T.splitOn "_FacetSep_" p
getStructParentChild Nothing = []
instance ToHttpApiData Document where instance ToHttpApiData Document where
toUrlPiece _ = "docid,title_s,en_abstract_s,fr_abstract_s,submittedDate_s,source_s,authFullName_s,structId_i,structIdName_fs" toUrlPiece _ = "docid,title_s,en_abstract_s,fr_abstract_s,submittedDate_s,source_s,authFullName_s,structId_i,structIdName_fs"
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment