[hyperdata] some fixes to uniqId

parent ea8965c4
......@@ -99,7 +99,7 @@ toDoc l (Arxiv.Result { abstract
) = HyperdataDocument { _hd_bdd = Just "Arxiv"
, _hd_doi = Just $ Text.pack doi
, _hd_url = Just $ Text.pack url
, _hd_uniqId = Just $ Text.pack id
, _hd_uniqId = Nothing
, _hd_uniqIdBdd = Nothing
, _hd_page = Nothing
, _hd_title = Just $ Text.pack title
......@@ -118,13 +118,10 @@ toDoc l (Arxiv.Result { abstract
where
authors :: [Ax.Author] -> Maybe Text
authors [] = Nothing
authors aus' = Just $ (Text.intercalate ", ")
$ map Text.pack
$ map Ax.auName aus'
authors aus' = Just $ Text.intercalate ", "
$ map (Text.pack . Ax.auName) aus'
institutes :: [Ax.Author] -> Maybe Text
institutes [] = Nothing
institutes aus' = Just $ (Text.intercalate ", ")
$ (map (Text.replace ", " " - "))
$ map Text.pack
$ map Ax.auFil aus'
institutes aus' = Just $ Text.intercalate ", "
$ map ((Text.replace ", " " - " . Text.pack) . Ax.auFil) aus'
......@@ -39,7 +39,7 @@ get (Just authKey) epoAPIUrl q lang mLimit = do
Just apiUrl -> do
eRes <- EPO.searchEPOAPIC apiUrl authKey Nothing limit (Corpus.getRawQuery q)
pure $ (\(total, itemsC) -> (Just total, itemsC .| mapC (toDoc lang))) <$> eRes
-- EPO.Paginated { .. } <- EPO.searchEPOAPI apiUrl authKey 1 20 (Corpus.getRawQuery q)
-- pure $ Right ( Just $ fromIntegral total, yieldMany items .| mapC (toDoc lang) )
......@@ -48,8 +48,8 @@ toDoc lang (EPO.HyperdataDocument { .. }) =
HyperdataDocument { _hd_bdd = Just "EPO"
, _hd_doi = Nothing
, _hd_url = Nothing
, _hd_uniqId = id
, _hd_uniqIdBdd = id
, _hd_uniqId = Nothing
, _hd_uniqIdBdd = Nothing
, _hd_page = Nothing
, _hd_title = Map.lookup lang titles
, _hd_authors = authors_
......@@ -66,10 +66,10 @@ toDoc lang (EPO.HyperdataDocument { .. }) =
, _hd_language_iso2 = Just $ iso639ToText lang }
where
authors_ = if authors == []
authors_ = if null authors
then Nothing
else Just (T.intercalate ", " authors)
-- EPO.withAuthKey authKey $ \token -> do
-- let range = EPO.Range { rBegin = 1, rEnd = limit }
-- (len, docsC) <- EPO.searchPublishedDataWithFetchC token (Just $ Corpus.getRawQuery q) (Just range)
......
......@@ -37,8 +37,8 @@ toDoc (OA.Work { .. } ) =
HyperdataDocument { _hd_bdd = Just "OpenAlex"
, _hd_doi = doi
, _hd_url = url
, _hd_uniqId = Just id
, _hd_uniqIdBdd = Just id
, _hd_uniqId = Nothing
, _hd_uniqIdBdd = Nothing
, _hd_page = firstPage biblio
, _hd_title = title
, _hd_authors = authors authorships
......@@ -55,25 +55,25 @@ toDoc (OA.Work { .. } ) =
, _hd_language_iso2 = language }
where
firstPage :: OA.Biblio -> Maybe Int
firstPage OA.Biblio { first_page } = maybe Nothing readMaybe $ T.unpack <$> first_page
firstPage OA.Biblio { first_page } = (readMaybe . T.unpack) =<< first_page
authors :: [OA.Authorship] -> Maybe Text
authors [] = Nothing
authors aus = Just $ T.intercalate ", " $ catMaybes (getDisplayName <$> aus)
authors aus = Just $ T.intercalate ", " $ mapMaybe getDisplayName aus
where
getDisplayName :: OA.Authorship -> Maybe Text
getDisplayName OA.Authorship { author = OA.DehydratedAuthor { display_name = dn } } = dn
institutes :: [OA.Authorship] -> Maybe Text
institutes [] = Nothing
institutes aus = Just $ T.intercalate ", " ((T.replace ", " " - ") . getInstitutesNames <$> aus)
institutes aus = Just $ T.intercalate ", " (T.replace ", " " - " . getInstitutesNames <$> aus)
where
getInstitutesNames OA.Authorship { institutions } = T.intercalate ", " $ getDisplayName <$> institutions
getDisplayName :: OA.DehydratedInstitution -> Text
getDisplayName OA.DehydratedInstitution { display_name = dn } = dn
source :: Maybe Text
source = maybe Nothing getSource primary_location
source = getSource =<< primary_location
where
getSource OA.Location { source = s } = getSourceDisplayName <$> s
getSourceDisplayName OA.DehydratedSource { display_name = dn } = dn
......@@ -114,7 +114,7 @@ toDoc l (PubMedDoc.PubMed { pubmed_id
) = HyperdataDocument { _hd_bdd = Just "PubMed"
, _hd_doi = Nothing
, _hd_url = Nothing
, _hd_uniqId = Just $ Text.pack $ show pubmed_id
, _hd_uniqId = Nothing
, _hd_uniqIdBdd = Nothing
, _hd_page = Nothing
, _hd_title = t
......
......@@ -29,7 +29,10 @@ import Gargantext.Database.Admin.Types.Hyperdata.Prelude
data HyperdataDocument = HyperdataDocument { _hd_bdd :: !(Maybe Text)
, _hd_doi :: !(Maybe Text)
, _hd_url :: !(Maybe Text)
-- | Unique MD5 hash of the document
, _hd_uniqId :: !(Maybe Text)
-- | Used as unique ID per source (can be same doc in Openalex, HAL, etc)
-- I think it's currently not used.
, _hd_uniqIdBdd :: !(Maybe Text)
, _hd_page :: !(Maybe Int)
, _hd_title :: !(Maybe Text)
......
......@@ -58,14 +58,14 @@ module Gargantext.Database.Query.Table.Node.Document.Insert
where
import Control.Lens (set, view)
import Control.Lens.Cons
import Control.Lens.Prism
import Control.Lens.Cons ( _head )
import Control.Lens.Prism ( _Just )
import Data.Aeson (toJSON, ToJSON)
import Data.Text qualified as DT (pack, concat, take, filter, toLower)
import Data.Time.Segment (jour)
import Database.PostgreSQL.Simple (FromRow, Query, Only(..))
import Database.PostgreSQL.Simple.FromRow (fromRow, field)
import Database.PostgreSQL.Simple.SqlQQ
import Database.PostgreSQL.Simple.SqlQQ ( sql )
import Database.PostgreSQL.Simple.ToField (toField, Action{-, ToField-})
import Database.PostgreSQL.Simple.Types (Values(..), QualifiedIdentifier(..))
import Gargantext.Core (HasDBid(toDBid))
......@@ -93,7 +93,7 @@ import Database.PostgreSQL.Simple (formatQuery)
insertDb :: (InsertDb a, HasDBid NodeType) => UserId -> Maybe ParentId -> [a] -> DBCmd err [ReturnId]
insertDb u p = runPGSQuery queryInsert . Only . Values fields . map (insertDb' u p)
where
fields = map (\t-> QualifiedIdentifier Nothing t) inputSqlTypes
fields = map (QualifiedIdentifier Nothing) inputSqlTypes
class InsertDb a
where
......@@ -207,12 +207,12 @@ instance AddUniqId HyperdataDocument
$ set hd_uniqId (Just shaUni) doc
where
shaUni = hash $ DT.concat $ map ($ doc) shaParametersDoc
shaBdd = hash $ DT.concat $ map ($ doc) ([(\d -> maybeText (_hd_bdd d))] <> shaParametersDoc)
shaBdd = hash $ DT.concat $ map ($ doc) ([maybeText . _hd_bdd] <> shaParametersDoc)
shaParametersDoc :: [(HyperdataDocument -> Text)]
shaParametersDoc = [ \d -> filterText $ maybeText (_hd_title d)
, \d -> filterText $ maybeText (_hd_abstract d)
, \d -> filterText $ maybeText (_hd_source d)
shaParametersDoc :: [HyperdataDocument -> Text]
shaParametersDoc = [ filterText . maybeText . _hd_title
, filterText . maybeText . _hd_abstract
, filterText . maybeText . _hd_source
-- , \d -> maybeText (_hd_publication_date d)
]
......@@ -230,14 +230,14 @@ instance UniqParameters (Node a)
filterText :: Text -> Text
filterText = DT.toLower . (DT.filter isAlphaNum)
filterText = DT.toLower . DT.filter isAlphaNum
instance (UniqParameters a, ToJSON a, HasDBid NodeType) => AddUniqId (Node a)
where
addUniqId (Node nid _ t u p n d h) = Node nid (Just newHash) t u p n d h
where
newHash = "\\x" <> (hash $ uniqParameters (fromMaybe 0 p) h)
newHash = "\\x" <> hash (uniqParameters (fromMaybe 0 p) h)
---------------------------------------------------------------------------
......@@ -249,17 +249,17 @@ instance AddUniqId HyperdataContact
addUniqId = addUniqIdsContact
addUniqIdsContact :: HyperdataContact -> HyperdataContact
addUniqIdsContact hc = set (hc_uniqIdBdd) (Just shaBdd)
$ set (hc_uniqId ) (Just shaUni) hc
addUniqIdsContact hc = set hc_uniqIdBdd (Just shaBdd)
$ set hc_uniqId (Just shaUni) hc
where
shaUni = hash $ DT.concat $ map ($ hc) shaParametersContact
shaBdd = hash $ DT.concat $ map ($ hc) ([\d -> maybeText (view hc_bdd d)] <> shaParametersContact)
shaBdd = hash $ DT.concat $ map ($ hc) ([maybeText . view hc_bdd] <> shaParametersContact)
-- | TODO add more shaparameters
shaParametersContact :: [(HyperdataContact -> Text)]
shaParametersContact = [ \d -> maybeText $ view (hc_who . _Just . cw_firstName ) d
, \d -> maybeText $ view (hc_who . _Just . cw_lastName ) d
, \d -> maybeText $ view (hc_where . _head . cw_touch . _Just . ct_mail) d
shaParametersContact :: [HyperdataContact -> Text]
shaParametersContact = [ maybeText . view (hc_who . _Just . cw_firstName )
, maybeText . view (hc_who . _Just . cw_lastName )
, maybeText . view (hc_where . _head . cw_touch . _Just . ct_mail)
]
......@@ -286,7 +286,7 @@ instance ToNode HyperdataDocument where
-- TODO better Node
instance ToNode HyperdataContact where
toNode u p h = Node 0 Nothing (toDBid NodeContact) u p "Contact" date h
toNode u p = Node 0 Nothing (toDBid NodeContact) u p "Contact" date
where
date = jour 2020 01 01
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment