[hyperdata] some fixes to uniqId

parent ea8965c4
...@@ -99,7 +99,7 @@ toDoc l (Arxiv.Result { abstract ...@@ -99,7 +99,7 @@ toDoc l (Arxiv.Result { abstract
) = HyperdataDocument { _hd_bdd = Just "Arxiv" ) = HyperdataDocument { _hd_bdd = Just "Arxiv"
, _hd_doi = Just $ Text.pack doi , _hd_doi = Just $ Text.pack doi
, _hd_url = Just $ Text.pack url , _hd_url = Just $ Text.pack url
, _hd_uniqId = Just $ Text.pack id , _hd_uniqId = Nothing
, _hd_uniqIdBdd = Nothing , _hd_uniqIdBdd = Nothing
, _hd_page = Nothing , _hd_page = Nothing
, _hd_title = Just $ Text.pack title , _hd_title = Just $ Text.pack title
...@@ -118,13 +118,10 @@ toDoc l (Arxiv.Result { abstract ...@@ -118,13 +118,10 @@ toDoc l (Arxiv.Result { abstract
where where
authors :: [Ax.Author] -> Maybe Text authors :: [Ax.Author] -> Maybe Text
authors [] = Nothing authors [] = Nothing
authors aus' = Just $ (Text.intercalate ", ") authors aus' = Just $ Text.intercalate ", "
$ map Text.pack $ map (Text.pack . Ax.auName) aus'
$ map Ax.auName aus'
institutes :: [Ax.Author] -> Maybe Text institutes :: [Ax.Author] -> Maybe Text
institutes [] = Nothing institutes [] = Nothing
institutes aus' = Just $ (Text.intercalate ", ") institutes aus' = Just $ Text.intercalate ", "
$ (map (Text.replace ", " " - ")) $ map ((Text.replace ", " " - " . Text.pack) . Ax.auFil) aus'
$ map Text.pack
$ map Ax.auFil aus'
...@@ -39,7 +39,7 @@ get (Just authKey) epoAPIUrl q lang mLimit = do ...@@ -39,7 +39,7 @@ get (Just authKey) epoAPIUrl q lang mLimit = do
Just apiUrl -> do Just apiUrl -> do
eRes <- EPO.searchEPOAPIC apiUrl authKey Nothing limit (Corpus.getRawQuery q) eRes <- EPO.searchEPOAPIC apiUrl authKey Nothing limit (Corpus.getRawQuery q)
pure $ (\(total, itemsC) -> (Just total, itemsC .| mapC (toDoc lang))) <$> eRes pure $ (\(total, itemsC) -> (Just total, itemsC .| mapC (toDoc lang))) <$> eRes
-- EPO.Paginated { .. } <- EPO.searchEPOAPI apiUrl authKey 1 20 (Corpus.getRawQuery q) -- EPO.Paginated { .. } <- EPO.searchEPOAPI apiUrl authKey 1 20 (Corpus.getRawQuery q)
-- pure $ Right ( Just $ fromIntegral total, yieldMany items .| mapC (toDoc lang) ) -- pure $ Right ( Just $ fromIntegral total, yieldMany items .| mapC (toDoc lang) )
...@@ -48,8 +48,8 @@ toDoc lang (EPO.HyperdataDocument { .. }) = ...@@ -48,8 +48,8 @@ toDoc lang (EPO.HyperdataDocument { .. }) =
HyperdataDocument { _hd_bdd = Just "EPO" HyperdataDocument { _hd_bdd = Just "EPO"
, _hd_doi = Nothing , _hd_doi = Nothing
, _hd_url = Nothing , _hd_url = Nothing
, _hd_uniqId = id , _hd_uniqId = Nothing
, _hd_uniqIdBdd = id , _hd_uniqIdBdd = Nothing
, _hd_page = Nothing , _hd_page = Nothing
, _hd_title = Map.lookup lang titles , _hd_title = Map.lookup lang titles
, _hd_authors = authors_ , _hd_authors = authors_
...@@ -66,10 +66,10 @@ toDoc lang (EPO.HyperdataDocument { .. }) = ...@@ -66,10 +66,10 @@ toDoc lang (EPO.HyperdataDocument { .. }) =
, _hd_language_iso2 = Just $ iso639ToText lang } , _hd_language_iso2 = Just $ iso639ToText lang }
where where
authors_ = if authors == [] authors_ = if null authors
then Nothing then Nothing
else Just (T.intercalate ", " authors) else Just (T.intercalate ", " authors)
-- EPO.withAuthKey authKey $ \token -> do -- EPO.withAuthKey authKey $ \token -> do
-- let range = EPO.Range { rBegin = 1, rEnd = limit } -- let range = EPO.Range { rBegin = 1, rEnd = limit }
-- (len, docsC) <- EPO.searchPublishedDataWithFetchC token (Just $ Corpus.getRawQuery q) (Just range) -- (len, docsC) <- EPO.searchPublishedDataWithFetchC token (Just $ Corpus.getRawQuery q) (Just range)
......
...@@ -37,8 +37,8 @@ toDoc (OA.Work { .. } ) = ...@@ -37,8 +37,8 @@ toDoc (OA.Work { .. } ) =
HyperdataDocument { _hd_bdd = Just "OpenAlex" HyperdataDocument { _hd_bdd = Just "OpenAlex"
, _hd_doi = doi , _hd_doi = doi
, _hd_url = url , _hd_url = url
, _hd_uniqId = Just id , _hd_uniqId = Nothing
, _hd_uniqIdBdd = Just id , _hd_uniqIdBdd = Nothing
, _hd_page = firstPage biblio , _hd_page = firstPage biblio
, _hd_title = title , _hd_title = title
, _hd_authors = authors authorships , _hd_authors = authors authorships
...@@ -55,25 +55,25 @@ toDoc (OA.Work { .. } ) = ...@@ -55,25 +55,25 @@ toDoc (OA.Work { .. } ) =
, _hd_language_iso2 = language } , _hd_language_iso2 = language }
where where
firstPage :: OA.Biblio -> Maybe Int firstPage :: OA.Biblio -> Maybe Int
firstPage OA.Biblio { first_page } = maybe Nothing readMaybe $ T.unpack <$> first_page firstPage OA.Biblio { first_page } = (readMaybe . T.unpack) =<< first_page
authors :: [OA.Authorship] -> Maybe Text authors :: [OA.Authorship] -> Maybe Text
authors [] = Nothing authors [] = Nothing
authors aus = Just $ T.intercalate ", " $ catMaybes (getDisplayName <$> aus) authors aus = Just $ T.intercalate ", " $ mapMaybe getDisplayName aus
where where
getDisplayName :: OA.Authorship -> Maybe Text getDisplayName :: OA.Authorship -> Maybe Text
getDisplayName OA.Authorship { author = OA.DehydratedAuthor { display_name = dn } } = dn getDisplayName OA.Authorship { author = OA.DehydratedAuthor { display_name = dn } } = dn
institutes :: [OA.Authorship] -> Maybe Text institutes :: [OA.Authorship] -> Maybe Text
institutes [] = Nothing institutes [] = Nothing
institutes aus = Just $ T.intercalate ", " ((T.replace ", " " - ") . getInstitutesNames <$> aus) institutes aus = Just $ T.intercalate ", " (T.replace ", " " - " . getInstitutesNames <$> aus)
where where
getInstitutesNames OA.Authorship { institutions } = T.intercalate ", " $ getDisplayName <$> institutions getInstitutesNames OA.Authorship { institutions } = T.intercalate ", " $ getDisplayName <$> institutions
getDisplayName :: OA.DehydratedInstitution -> Text getDisplayName :: OA.DehydratedInstitution -> Text
getDisplayName OA.DehydratedInstitution { display_name = dn } = dn getDisplayName OA.DehydratedInstitution { display_name = dn } = dn
source :: Maybe Text source :: Maybe Text
source = maybe Nothing getSource primary_location source = getSource =<< primary_location
where where
getSource OA.Location { source = s } = getSourceDisplayName <$> s getSource OA.Location { source = s } = getSourceDisplayName <$> s
getSourceDisplayName OA.DehydratedSource { display_name = dn } = dn getSourceDisplayName OA.DehydratedSource { display_name = dn } = dn
...@@ -114,7 +114,7 @@ toDoc l (PubMedDoc.PubMed { pubmed_id ...@@ -114,7 +114,7 @@ toDoc l (PubMedDoc.PubMed { pubmed_id
) = HyperdataDocument { _hd_bdd = Just "PubMed" ) = HyperdataDocument { _hd_bdd = Just "PubMed"
, _hd_doi = Nothing , _hd_doi = Nothing
, _hd_url = Nothing , _hd_url = Nothing
, _hd_uniqId = Just $ Text.pack $ show pubmed_id , _hd_uniqId = Nothing
, _hd_uniqIdBdd = Nothing , _hd_uniqIdBdd = Nothing
, _hd_page = Nothing , _hd_page = Nothing
, _hd_title = t , _hd_title = t
......
...@@ -29,7 +29,10 @@ import Gargantext.Database.Admin.Types.Hyperdata.Prelude ...@@ -29,7 +29,10 @@ import Gargantext.Database.Admin.Types.Hyperdata.Prelude
data HyperdataDocument = HyperdataDocument { _hd_bdd :: !(Maybe Text) data HyperdataDocument = HyperdataDocument { _hd_bdd :: !(Maybe Text)
, _hd_doi :: !(Maybe Text) , _hd_doi :: !(Maybe Text)
, _hd_url :: !(Maybe Text) , _hd_url :: !(Maybe Text)
-- | Unique MD5 hash of the document
, _hd_uniqId :: !(Maybe Text) , _hd_uniqId :: !(Maybe Text)
-- | Used as unique ID per source (can be same doc in Openalex, HAL, etc)
-- I think it's currently not used.
, _hd_uniqIdBdd :: !(Maybe Text) , _hd_uniqIdBdd :: !(Maybe Text)
, _hd_page :: !(Maybe Int) , _hd_page :: !(Maybe Int)
, _hd_title :: !(Maybe Text) , _hd_title :: !(Maybe Text)
......
...@@ -58,14 +58,14 @@ module Gargantext.Database.Query.Table.Node.Document.Insert ...@@ -58,14 +58,14 @@ module Gargantext.Database.Query.Table.Node.Document.Insert
where where
import Control.Lens (set, view) import Control.Lens (set, view)
import Control.Lens.Cons import Control.Lens.Cons ( _head )
import Control.Lens.Prism import Control.Lens.Prism ( _Just )
import Data.Aeson (toJSON, ToJSON) import Data.Aeson (toJSON, ToJSON)
import Data.Text qualified as DT (pack, concat, take, filter, toLower) import Data.Text qualified as DT (pack, concat, take, filter, toLower)
import Data.Time.Segment (jour) import Data.Time.Segment (jour)
import Database.PostgreSQL.Simple (FromRow, Query, Only(..)) import Database.PostgreSQL.Simple (FromRow, Query, Only(..))
import Database.PostgreSQL.Simple.FromRow (fromRow, field) import Database.PostgreSQL.Simple.FromRow (fromRow, field)
import Database.PostgreSQL.Simple.SqlQQ import Database.PostgreSQL.Simple.SqlQQ ( sql )
import Database.PostgreSQL.Simple.ToField (toField, Action{-, ToField-}) import Database.PostgreSQL.Simple.ToField (toField, Action{-, ToField-})
import Database.PostgreSQL.Simple.Types (Values(..), QualifiedIdentifier(..)) import Database.PostgreSQL.Simple.Types (Values(..), QualifiedIdentifier(..))
import Gargantext.Core (HasDBid(toDBid)) import Gargantext.Core (HasDBid(toDBid))
...@@ -93,7 +93,7 @@ import Database.PostgreSQL.Simple (formatQuery) ...@@ -93,7 +93,7 @@ import Database.PostgreSQL.Simple (formatQuery)
insertDb :: (InsertDb a, HasDBid NodeType) => UserId -> Maybe ParentId -> [a] -> DBCmd err [ReturnId] insertDb :: (InsertDb a, HasDBid NodeType) => UserId -> Maybe ParentId -> [a] -> DBCmd err [ReturnId]
insertDb u p = runPGSQuery queryInsert . Only . Values fields . map (insertDb' u p) insertDb u p = runPGSQuery queryInsert . Only . Values fields . map (insertDb' u p)
where where
fields = map (\t-> QualifiedIdentifier Nothing t) inputSqlTypes fields = map (QualifiedIdentifier Nothing) inputSqlTypes
class InsertDb a class InsertDb a
where where
...@@ -207,12 +207,12 @@ instance AddUniqId HyperdataDocument ...@@ -207,12 +207,12 @@ instance AddUniqId HyperdataDocument
$ set hd_uniqId (Just shaUni) doc $ set hd_uniqId (Just shaUni) doc
where where
shaUni = hash $ DT.concat $ map ($ doc) shaParametersDoc shaUni = hash $ DT.concat $ map ($ doc) shaParametersDoc
shaBdd = hash $ DT.concat $ map ($ doc) ([(\d -> maybeText (_hd_bdd d))] <> shaParametersDoc) shaBdd = hash $ DT.concat $ map ($ doc) ([maybeText . _hd_bdd] <> shaParametersDoc)
shaParametersDoc :: [(HyperdataDocument -> Text)] shaParametersDoc :: [HyperdataDocument -> Text]
shaParametersDoc = [ \d -> filterText $ maybeText (_hd_title d) shaParametersDoc = [ filterText . maybeText . _hd_title
, \d -> filterText $ maybeText (_hd_abstract d) , filterText . maybeText . _hd_abstract
, \d -> filterText $ maybeText (_hd_source d) , filterText . maybeText . _hd_source
-- , \d -> maybeText (_hd_publication_date d) -- , \d -> maybeText (_hd_publication_date d)
] ]
...@@ -230,14 +230,14 @@ instance UniqParameters (Node a) ...@@ -230,14 +230,14 @@ instance UniqParameters (Node a)
filterText :: Text -> Text filterText :: Text -> Text
filterText = DT.toLower . (DT.filter isAlphaNum) filterText = DT.toLower . DT.filter isAlphaNum
instance (UniqParameters a, ToJSON a, HasDBid NodeType) => AddUniqId (Node a) instance (UniqParameters a, ToJSON a, HasDBid NodeType) => AddUniqId (Node a)
where where
addUniqId (Node nid _ t u p n d h) = Node nid (Just newHash) t u p n d h addUniqId (Node nid _ t u p n d h) = Node nid (Just newHash) t u p n d h
where where
newHash = "\\x" <> (hash $ uniqParameters (fromMaybe 0 p) h) newHash = "\\x" <> hash (uniqParameters (fromMaybe 0 p) h)
--------------------------------------------------------------------------- ---------------------------------------------------------------------------
...@@ -249,17 +249,17 @@ instance AddUniqId HyperdataContact ...@@ -249,17 +249,17 @@ instance AddUniqId HyperdataContact
addUniqId = addUniqIdsContact addUniqId = addUniqIdsContact
addUniqIdsContact :: HyperdataContact -> HyperdataContact addUniqIdsContact :: HyperdataContact -> HyperdataContact
addUniqIdsContact hc = set (hc_uniqIdBdd) (Just shaBdd) addUniqIdsContact hc = set hc_uniqIdBdd (Just shaBdd)
$ set (hc_uniqId ) (Just shaUni) hc $ set hc_uniqId (Just shaUni) hc
where where
shaUni = hash $ DT.concat $ map ($ hc) shaParametersContact shaUni = hash $ DT.concat $ map ($ hc) shaParametersContact
shaBdd = hash $ DT.concat $ map ($ hc) ([\d -> maybeText (view hc_bdd d)] <> shaParametersContact) shaBdd = hash $ DT.concat $ map ($ hc) ([maybeText . view hc_bdd] <> shaParametersContact)
-- | TODO add more shaparameters -- | TODO add more shaparameters
shaParametersContact :: [(HyperdataContact -> Text)] shaParametersContact :: [HyperdataContact -> Text]
shaParametersContact = [ \d -> maybeText $ view (hc_who . _Just . cw_firstName ) d shaParametersContact = [ maybeText . view (hc_who . _Just . cw_firstName )
, \d -> maybeText $ view (hc_who . _Just . cw_lastName ) d , maybeText . view (hc_who . _Just . cw_lastName )
, \d -> maybeText $ view (hc_where . _head . cw_touch . _Just . ct_mail) d , maybeText . view (hc_where . _head . cw_touch . _Just . ct_mail)
] ]
...@@ -286,7 +286,7 @@ instance ToNode HyperdataDocument where ...@@ -286,7 +286,7 @@ instance ToNode HyperdataDocument where
-- TODO better Node -- TODO better Node
instance ToNode HyperdataContact where instance ToNode HyperdataContact where
toNode u p h = Node 0 Nothing (toDBid NodeContact) u p "Contact" date h toNode u p = Node 0 Nothing (toDBid NodeContact) u p "Contact" date
where where
date = jour 2020 01 01 date = jour 2020 01 01
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment