Verified Commit 5a22a9f2 authored by Przemyslaw Kaminski's avatar Przemyslaw Kaminski

Merge branch 'dev' into 313-dev-json-csv-import-of-older-ngrams

parents a13df89f 845db556
Pipeline #5731 failed with stages
in 22 minutes and 14 seconds
## Version 0.0.6.9.9.9.6.5.1 [RELEASE CANDIDATE 007]
* [FRONT][FIX][Machting Documents are not displayed anymore in graph (#636)](https://gitlab.iscpif.fr/gargantext/purescript-gargantext/issues/636)
## Version 0.0.6.9.9.9.6.5 [RELEASE CANDIDATE 007] ## Version 0.0.6.9.9.9.6.5 [RELEASE CANDIDATE 007]
* [BACK][WIP][Singulars and plurals not grouped anymore (#169)](https://gitlab.iscpif.fr/gargantext/haskell-gargantext/issues/169) * [BACK][WIP][Singulars and plurals not grouped anymore (#169)](https://gitlab.iscpif.fr/gargantext/haskell-gargantext/issues/169)
......
...@@ -5,7 +5,7 @@ cabal-version: 3.4 ...@@ -5,7 +5,7 @@ cabal-version: 3.4
-- see: https://github.com/sol/hpack -- see: https://github.com/sol/hpack
name: gargantext name: gargantext
version: 0.0.6.9.9.9.6.5 version: 0.0.6.9.9.9.6.5.1
synopsis: Search, map, share synopsis: Search, map, share
description: Please see README.md description: Please see README.md
category: Data category: Data
......
...@@ -25,10 +25,10 @@ import Data.Morpheus.Types ...@@ -25,10 +25,10 @@ import Data.Morpheus.Types
import Data.Text (pack) import Data.Text (pack)
import Data.Time.Format.ISO8601 (iso8601Show) import Data.Time.Format.ISO8601 (iso8601Show)
import Gargantext.API.Admin.Types (HasSettings) import Gargantext.API.Admin.Types (HasSettings)
import Gargantext.API.Errors.Types ( BackendInternalError ) import Gargantext.API.Errors.Types
import Gargantext.API.Prelude (GargM) import Gargantext.API.Prelude (GargM)
import Gargantext.Core.Types.Search (HyperdataRow(..), toHyperdataRow) import Gargantext.Core.Types.Search (HyperdataRow(..), toHyperdataRow)
import Gargantext.Database.Admin.Types.Hyperdata.Document ( HyperdataDocument ) import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument)
import Gargantext.Database.Admin.Types.Node (ContextTitle, NodeId(..), NodeTypeId, UserId, unNodeId, ContextId (..)) import Gargantext.Database.Admin.Types.Node (ContextTitle, NodeId(..), NodeTypeId, UserId, unNodeId, ContextId (..))
import Gargantext.Database.Prelude (CmdCommon) import Gargantext.Database.Prelude (CmdCommon)
import Gargantext.Database.Query.Table.NodeContext (getNodeContext, getContextsForNgramsTerms, ContextForNgramsTerms(..), {- getContextNgrams, -} getContextNgramsMatchingFTS) import Gargantext.Database.Query.Table.NodeContext (getNodeContext, getContextsForNgramsTerms, ContextForNgramsTerms(..), {- getContextNgrams, -} getContextNgramsMatchingFTS)
...@@ -71,6 +71,8 @@ data HyperdataRowDocumentGQL = ...@@ -71,6 +71,8 @@ data HyperdataRowDocumentGQL =
, hrd_source :: Text , hrd_source :: Text
, hrd_title :: Text , hrd_title :: Text
, hrd_url :: Text , hrd_url :: Text
, hrd_uniqId :: Text
, hrd_uniqIdBdd :: Text
} deriving (Generic, GQLType, Show) } deriving (Generic, GQLType, Show)
data NodeContextGQL = NodeContextGQL data NodeContextGQL = NodeContextGQL
...@@ -214,6 +216,8 @@ toHyperdataRowDocumentGQL hyperdata = ...@@ -214,6 +216,8 @@ toHyperdataRowDocumentGQL hyperdata =
, hrd_source = _hr_source , hrd_source = _hr_source
, hrd_title = _hr_title , hrd_title = _hr_title
, hrd_url = _hr_url , hrd_url = _hr_url
, hrd_uniqId = _hr_uniqId
, hrd_uniqIdBdd = _hr_uniqIdBdd
} }
HyperdataRowContact { } -> Nothing HyperdataRowContact { } -> Nothing
......
...@@ -22,21 +22,21 @@ import Data.Map.Strict qualified as Map ...@@ -22,21 +22,21 @@ import Data.Map.Strict qualified as Map
import Data.Set qualified as Set import Data.Set qualified as Set
import Data.Text (pack) import Data.Text (pack)
import Gargantext.API.Ngrams.Tools (filterListWithRoot, mapTermListRoot, getRepo) import Gargantext.API.Ngrams.Tools (filterListWithRoot, mapTermListRoot, getRepo)
import Gargantext.API.Ngrams.Types ( NgramsTerm(unNgramsTerm) ) import Gargantext.API.Ngrams.Types
import Gargantext.API.Node.Corpus.Export.Types ( Corpus(..) ) import Gargantext.API.Node.Corpus.Export.Types
import Gargantext.API.Node.Document.Export.Types qualified as DocumentExport import Gargantext.API.Node.Document.Export.Types qualified as DocumentExport
import Gargantext.API.Prelude (GargNoServer) import Gargantext.API.Prelude (GargNoServer)
import Gargantext.Core.NodeStory.Types ( NodeListStory ) import Gargantext.Core.NodeStory
import Gargantext.Core.Types import Gargantext.Core.Types
import Gargantext.Database.Action.Metrics.NgramsByContext (getNgramsByContextOnlyUser) import Gargantext.Database.Action.Metrics.NgramsByContext (getNgramsByContextOnlyUser)
import Gargantext.Database.Admin.Config (userMaster) import Gargantext.Database.Admin.Config (userMaster)
import Gargantext.Database.Admin.Types.Hyperdata.Document ( HyperdataDocument(..) ) import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..))
import Gargantext.Database.Prelude (Cmd) import Gargantext.Database.Prelude (Cmd)
import Gargantext.Database.Query.Table.Node ( defaultList ) import Gargantext.Database.Query.Table.Node
import Gargantext.Database.Query.Table.Node.Error (HasNodeError) import Gargantext.Database.Query.Table.Node.Error (HasNodeError)
import Gargantext.Database.Query.Table.Node.Select (selectNodesWithUsername) import Gargantext.Database.Query.Table.Node.Select (selectNodesWithUsername)
import Gargantext.Database.Query.Table.NodeContext (selectDocNodes) import Gargantext.Database.Query.Table.NodeContext (selectDocNodes)
import Gargantext.Database.Schema.Context (_context_id) import Gargantext.Database.Schema.Context (_context_id, _context_hyperdata)
import Gargantext.Database.Schema.Ngrams (NgramsType(..)) import Gargantext.Database.Schema.Ngrams (NgramsType(..))
import Gargantext.Prelude hiding (hash) import Gargantext.Prelude hiding (hash)
import Gargantext.Prelude.Crypto.Hash (hash) import Gargantext.Prelude.Crypto.Hash (hash)
...@@ -51,7 +51,9 @@ getCorpus :: CorpusId ...@@ -51,7 +51,9 @@ getCorpus :: CorpusId
getCorpus cId lId nt' = do getCorpus cId lId nt' = do
let let
nt = fromMaybe NgramsTerms nt' nt = case nt' of
Nothing -> NgramsTerms
Just t -> t
listId <- case lId of listId <- case lId of
Nothing -> defaultList cId Nothing -> defaultList cId
...@@ -73,10 +75,10 @@ getCorpus cId lId nt' = do ...@@ -73,10 +75,10 @@ getCorpus cId lId nt' = do
) ns (Map.map (Set.map unNgramsTerm) ngs) ) ns (Map.map (Set.map unNgramsTerm) ngs)
where where
d_hash :: Context HyperdataDocument -> Set Text -> Text d_hash :: Context HyperdataDocument -> Set Text -> Text
d_hash _a b = hash [ -- fromMaybe "" (_hd_uniqId $ _context_hyperdata a), d_hash a b = hash [ fromMaybe "" (_hd_uniqId $ _context_hyperdata a)
hash b , hash b
] ]
pure $ addHeader ("attachment; filename=GarganText_corpus-" <> pack (show cId) <> ".json") pure $ addHeader ("attachment; filename=GarganText_corpus-" <> (pack $ show cId) <> ".json")
$ Corpus { _c_corpus = Map.elems r $ Corpus { _c_corpus = Map.elems r
, _c_hash = hash $ List.map DocumentExport._d_hash $ Map.elems r } , _c_hash = hash $ List.map DocumentExport._d_hash $ Map.elems r }
......
...@@ -24,7 +24,7 @@ import Data.Time.Format (defaultTimeLocale, formatTime, parseTimeM) ...@@ -24,7 +24,7 @@ import Data.Time.Format (defaultTimeLocale, formatTime, parseTimeM)
import Data.Tuple.Select (sel1, sel2, sel3) import Data.Tuple.Select (sel1, sel2, sel3)
import Gargantext.Core (Lang(..)) import Gargantext.Core (Lang(..))
import Gargantext.Core.NLP (HasNLPServer, nlpServerGet) import Gargantext.Core.NLP (HasNLPServer, nlpServerGet)
import Gargantext.Core.NodeStory.Types ( HasNodeStory ) import Gargantext.Core.NodeStory (HasNodeStory)
import Gargantext.Core.Text.Corpus.API qualified as API import Gargantext.Core.Text.Corpus.API qualified as API
import Gargantext.Core.Text.List (buildNgramsLists) import Gargantext.Core.Text.List (buildNgramsLists)
import Gargantext.Core.Text.List.Group.WithStem ({-StopSize(..),-} GroupParams(..)) import Gargantext.Core.Text.List.Group.WithStem ({-StopSize(..),-} GroupParams(..))
...@@ -40,7 +40,8 @@ import Gargantext.Database.Admin.Types.Hyperdata.Corpus (HyperdataCorpus) ...@@ -40,7 +40,8 @@ import Gargantext.Database.Admin.Types.Hyperdata.Corpus (HyperdataCorpus)
import Gargantext.Database.Admin.Types.Hyperdata.Document (HyperdataDocument(..)) import Gargantext.Database.Admin.Types.Hyperdata.Document (HyperdataDocument(..))
import Gargantext.Database.Admin.Types.Node (CorpusId, ListId, NodeType(NodeTexts)) import Gargantext.Database.Admin.Types.Node (CorpusId, ListId, NodeType(NodeTexts))
import Gargantext.Database.Prelude (hasConfig) import Gargantext.Database.Prelude (hasConfig)
import Gargantext.Database.Query.Table.Node (getOrMkList, insertDefaultNodeIfNotExists) import Gargantext.Database.Query.Table.Node (getOrMkList)
import Gargantext.Database.Query.Table.Node (insertDefaultNodeIfNotExists)
import Gargantext.Database.Query.Table.Node.Error (HasNodeError) import Gargantext.Database.Query.Table.Node.Error (HasNodeError)
import Gargantext.Database.Query.Tree.Error (HasTreeError) import Gargantext.Database.Query.Tree.Error (HasTreeError)
import Gargantext.Database.Query.Tree.Root (getOrMk_RootWithCorpus) import Gargantext.Database.Query.Tree.Root (getOrMk_RootWithCorpus)
...@@ -53,7 +54,7 @@ import Prelude qualified ...@@ -53,7 +54,7 @@ import Prelude qualified
langToSearx :: Lang -> Text langToSearx :: Lang -> Text
langToSearx All = "en-US" langToSearx All = "en-US"
langToSearx x = Text.toLower acronym <> "-" <> acronym langToSearx x = (Text.toLower acronym) <> "-" <> acronym
where where
acronym = show x acronym = show x
...@@ -136,7 +137,7 @@ insertSearxResponse user cId listId l (Right (SearxResponse { _srs_results })) = ...@@ -136,7 +137,7 @@ insertSearxResponse user cId listId l (Right (SearxResponse { _srs_results })) =
-- docs :: [Either Text HyperdataDocument] -- docs :: [Either Text HyperdataDocument]
let docs = hyperdataDocumentFromSearxResult l <$> _srs_results let docs = hyperdataDocumentFromSearxResult l <$> _srs_results
--printDebug "[triggerSearxSearch] docs" docs --printDebug "[triggerSearxSearch] docs" docs
let docs' = mapMaybe rightToMaybe docs let docs' = catMaybes $ rightToMaybe <$> docs
{- {-
Prelude.mapM_ (\(HyperdataDocument { _hd_title, _hd_publication_year, _hd_publication_date }) -> do Prelude.mapM_ (\(HyperdataDocument { _hd_title, _hd_publication_year, _hd_publication_date }) -> do
printDebug "[triggerSearxSearch] doc time" $ printDebug "[triggerSearxSearch] doc time" $
...@@ -214,14 +215,16 @@ hyperdataDocumentFromSearxResult l (SearxResult { _sr_content, _sr_engine, _sr_p ...@@ -214,14 +215,16 @@ hyperdataDocumentFromSearxResult l (SearxResult { _sr_content, _sr_engine, _sr_p
Right HyperdataDocument { _hd_bdd = Just "Searx" Right HyperdataDocument { _hd_bdd = Just "Searx"
, _hd_doi = Nothing , _hd_doi = Nothing
, _hd_url = Nothing , _hd_url = Nothing
, _hd_uniqId = Nothing
, _hd_uniqIdBdd = Nothing
, _hd_page = Nothing , _hd_page = Nothing
, _hd_title = Just _sr_title , _hd_title = Just _sr_title
, _hd_authors = Nothing , _hd_authors = Nothing
, _hd_institutes = Nothing , _hd_institutes = Nothing
, _hd_source = Just _sr_engine , _hd_source = Just _sr_engine
, _hd_abstract = _sr_content , _hd_abstract = _sr_content
, _hd_publication_date = T.pack Prelude.. formatTime defaultTimeLocale "%Y-%m-%dT%H:%M:%S" Prelude.<$> mDate , _hd_publication_date = T.pack <$> formatTime defaultTimeLocale "%Y-%m-%dT%H:%M:%S" <$> mDate
, _hd_publication_year = fromIntegral Prelude.. sel1 Prelude.<$> mGregorian , _hd_publication_year = fromIntegral <$> sel1 <$> mGregorian
, _hd_publication_month = sel2 <$> mGregorian , _hd_publication_month = sel2 <$> mGregorian
, _hd_publication_day = sel3 <$> mGregorian , _hd_publication_day = sel3 <$> mGregorian
, _hd_publication_hour = Nothing , _hd_publication_hour = Nothing
......
...@@ -22,22 +22,22 @@ import Data.Swagger (ToSchema) ...@@ -22,22 +22,22 @@ import Data.Swagger (ToSchema)
import Data.Text qualified as T import Data.Text qualified as T
import Gargantext.API.Admin.EnvTypes (GargJob(..), Env) import Gargantext.API.Admin.EnvTypes (GargJob(..), Env)
import Gargantext.API.Admin.Orchestrator.Types (JobLog(..), AsyncJobs) import Gargantext.API.Admin.Orchestrator.Types (JobLog(..), AsyncJobs)
import Gargantext.API.Errors.Types ( BackendInternalError ) import Gargantext.API.Errors.Types
import Gargantext.API.Prelude ( GargM ) import Gargantext.API.Prelude
import Gargantext.Core (Lang(..)) import Gargantext.Core (Lang(..))
import Gargantext.Core.NLP (nlpServerGet) import Gargantext.Core.NLP (nlpServerGet)
import Gargantext.Core.Text.Corpus.Parsers.Date (mDateSplit) import Gargantext.Core.Text.Corpus.Parsers.Date (mDateSplit)
import Gargantext.Core.Text.Terms (TermType(..)) import Gargantext.Core.Text.Terms (TermType(..))
import Gargantext.Core.Utils.Prefix (unCapitalize, dropPrefix) import Gargantext.Core.Utils.Prefix (unCapitalize, dropPrefix)
import Gargantext.Database.Action.Flow (addDocumentsToHyperCorpus) import Gargantext.Database.Action.Flow (addDocumentsToHyperCorpus)
import Gargantext.Database.Action.Flow.Types ( FlowCmdM ) import Gargantext.Database.Action.Flow.Types
import Gargantext.Database.Admin.Types.Hyperdata.Corpus ( HyperdataCorpus ) import Gargantext.Database.Admin.Types.Hyperdata.Corpus
import Gargantext.Database.Admin.Types.Hyperdata.Document (HyperdataDocument(..)) import Gargantext.Database.Admin.Types.Hyperdata.Document (HyperdataDocument(..))
import Gargantext.Database.Admin.Types.Node ( DocId, NodeId, NodeType(NodeCorpus) ) import Gargantext.Database.Admin.Types.Node
import Gargantext.Database.Query.Table.Node (getClosestParentIdByType') import Gargantext.Database.Query.Table.Node (getClosestParentIdByType')
import Gargantext.Prelude import Gargantext.Prelude
import Gargantext.Utils.Jobs (serveJobsAPI, MonadJobStatus(..)) import Gargantext.Utils.Jobs (serveJobsAPI, MonadJobStatus(..))
import Servant ( JSON, Summary, type (:>), HasServer(ServerT) ) import Servant
data DocumentUpload = DocumentUpload data DocumentUpload = DocumentUpload
...@@ -108,6 +108,8 @@ documentUpload nId doc = do ...@@ -108,6 +108,8 @@ documentUpload nId doc = do
let hd = HyperdataDocument { _hd_bdd = Nothing let hd = HyperdataDocument { _hd_bdd = Nothing
, _hd_doi = Nothing , _hd_doi = Nothing
, _hd_url = Nothing , _hd_url = Nothing
, _hd_uniqId = Nothing
, _hd_uniqIdBdd = Nothing
, _hd_page = Nothing , _hd_page = Nothing
, _hd_title = Just $ if view du_title doc == "" then T.take 50 (view du_abstract doc) else view du_title doc , _hd_title = Just $ if view du_title doc == "" then T.take 50 (view du_abstract doc) else view du_title doc
, _hd_authors = Just $ view du_authors doc , _hd_authors = Just $ view du_authors doc
......
...@@ -10,22 +10,23 @@ Portability : POSIX ...@@ -10,22 +10,23 @@ Portability : POSIX
-} -}
{-# LANGUAGE MonoLocalBinds #-} {-# LANGUAGE MonoLocalBinds #-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE TypeOperators #-} {-# LANGUAGE TypeOperators #-}
module Gargantext.API.Node.DocumentsFromWriteNodes module Gargantext.API.Node.DocumentsFromWriteNodes
where where
import Conduit ( yieldMany ) import Conduit
import Control.Lens ((^.)) import Control.Lens ((^.))
import Data.Aeson ( genericParseJSON, defaultOptions, genericToJSON, FromJSON(parseJSON), ToJSON(toJSON) ) import Data.Aeson
import Data.List qualified as List import Data.List qualified as List
import Data.Swagger ( ToSchema ) import Data.Swagger
import Data.Text qualified as T import Data.Text qualified as T
import Gargantext.API.Admin.Auth.Types ( AuthenticatedUser, auth_node_id, auth_user_id ) import Gargantext.API.Admin.Auth.Types
import Gargantext.API.Admin.EnvTypes (Env, GargJob(..)) import Gargantext.API.Admin.EnvTypes (Env, GargJob(..))
import Gargantext.API.Admin.Orchestrator.Types (JobLog(..), AsyncJobs) import Gargantext.API.Admin.Orchestrator.Types (JobLog(..), AsyncJobs)
import Gargantext.API.Admin.Types (HasSettings) import Gargantext.API.Admin.Types (HasSettings)
import Gargantext.API.Errors.Types ( BackendInternalError ) import Gargantext.API.Errors.Types
import Gargantext.API.Ngrams (commitStatePatch, Versioned(..)) import Gargantext.API.Ngrams (commitStatePatch, Versioned(..))
import Gargantext.API.Prelude (GargM) import Gargantext.API.Prelude (GargM)
import Gargantext.Core (Lang(..)) import Gargantext.Core (Lang(..))
...@@ -38,13 +39,13 @@ import Gargantext.Core.Types.Individu (User(..)) ...@@ -38,13 +39,13 @@ import Gargantext.Core.Types.Individu (User(..))
import Gargantext.Database.Action.Flow (flowDataText, DataText(..)) import Gargantext.Database.Action.Flow (flowDataText, DataText(..))
import Gargantext.Database.Action.Flow.Types (FlowCmdM) import Gargantext.Database.Action.Flow.Types (FlowCmdM)
import Gargantext.Database.Admin.Types.Hyperdata.Document (HyperdataDocument(..)) import Gargantext.Database.Admin.Types.Hyperdata.Document (HyperdataDocument(..))
import Gargantext.Database.Admin.Types.Hyperdata.Frame ( HyperdataFrame(..), getHyperdataFrameContents ) import Gargantext.Database.Admin.Types.Hyperdata.Frame
import Gargantext.Database.Admin.Types.Node ( NodeId, Node, NodeType(..) ) import Gargantext.Database.Admin.Types.Node
import Gargantext.Database.Query.Table.Node (getChildrenByType, getClosestParentIdByType', getNodeWith, getOrMkList) import Gargantext.Database.Query.Table.Node (getChildrenByType, getClosestParentIdByType', getNodeWith, getOrMkList)
import Gargantext.Database.Schema.Node (node_hyperdata, node_name, node_date) import Gargantext.Database.Schema.Node (node_hyperdata, node_name, node_date)
import Gargantext.Prelude import Gargantext.Prelude
import Gargantext.Utils.Jobs (serveJobsAPI, MonadJobStatus(..)) import Gargantext.Utils.Jobs (serveJobsAPI, MonadJobStatus(..))
import Servant ( JSON, Summary, type (:>), HasServer(ServerT) ) import Servant
------------------------------------------------------------------------ ------------------------------------------------------------------------
type API = Summary " Documents from Write nodes." type API = Summary " Documents from Write nodes."
...@@ -105,7 +106,7 @@ documentsFromWriteNodes authenticatedUser nId Params { selection, lang, paragrap ...@@ -105,7 +106,7 @@ documentsFromWriteNodes authenticatedUser nId Params { selection, lang, paragrap
pure (node, contents) pure (node, contents)
) frameWrites ) frameWrites
let paragraphs' = fromMaybe (7 :: Int) $ readMaybe (T.unpack paragraphs) let paragraphs' = fromMaybe (7 :: Int) $ (readMaybe $ T.unpack paragraphs)
let parsedE = (\(node, contents) let parsedE = (\(node, contents)
-> hyperdataDocumentFromFrameWrite lang paragraphs' (node, contents)) <$> frameWritesWithContents -> hyperdataDocumentFromFrameWrite lang paragraphs' (node, contents)) <$> frameWritesWithContents
let parsed = List.concat $ rights parsedE let parsed = List.concat $ rights parsedE
...@@ -158,6 +159,8 @@ hyperdataDocumentFromFrameWrite lang paragraphSize (node, contents) = ...@@ -158,6 +159,8 @@ hyperdataDocumentFromFrameWrite lang paragraphSize (node, contents) =
Right (List.map (\(t, ctxt) -> HyperdataDocument { _hd_bdd = Just $ show Notes Right (List.map (\(t, ctxt) -> HyperdataDocument { _hd_bdd = Just $ show Notes
, _hd_doi = Nothing , _hd_doi = Nothing
, _hd_url = Nothing , _hd_url = Nothing
, _hd_uniqId = Nothing
, _hd_uniqIdBdd = Nothing
, _hd_page = Nothing , _hd_page = Nothing
, _hd_title = Just t , _hd_title = Just t
, _hd_authors = Just authors' , _hd_authors = Just authors'
......
...@@ -16,13 +16,13 @@ Here is writtent a common interface. ...@@ -16,13 +16,13 @@ Here is writtent a common interface.
module Gargantext.Core.Ext.IMTUser -- (deserialiseImtUsersFromFile) module Gargantext.Core.Ext.IMTUser -- (deserialiseImtUsersFromFile)
where where
import Codec.Serialise ( Serialise, deserialise ) import Codec.Serialise
import Data.ByteString.Lazy qualified as BL import Data.ByteString.Lazy qualified as BL
import Data.Csv ( (.:), header, decodeByNameWith, FromNamedRecord(..), Header ) import Data.Csv
import Data.Text qualified as T import Data.Text qualified as T
import Data.Vector (Vector) import Data.Vector (Vector)
import Data.Vector qualified as Vector import Data.Vector qualified as Vector
import Gargantext.Core.Text.Corpus.Parsers.CSV ( csvDecodeOptions, Delimiter(Tab) ) import Gargantext.Core.Text.Corpus.Parsers.CSV
import Gargantext.Database.Admin.Types.Hyperdata.Contact import Gargantext.Database.Admin.Types.Hyperdata.Contact
import Gargantext.Prelude import Gargantext.Prelude
import System.FilePath.Posix (takeExtension) import System.FilePath.Posix (takeExtension)
...@@ -156,9 +156,11 @@ imtUser2gargContact (IMTUser { id ...@@ -156,9 +156,11 @@ imtUser2gargContact (IMTUser { id
, _hc_where = [ou] , _hc_where = [ou]
, _hc_title = title , _hc_title = title
, _hc_source = entite , _hc_source = entite
, _hc_lastValidation = date_modification } , _hc_lastValidation = date_modification
, _hc_uniqIdBdd = Nothing
, _hc_uniqId = Nothing }
where where
title = (<>) <$> fmap (\p -> p <> " ") prenom <*> nom title = (<>) <$> (fmap (\p -> p <> " ") prenom) <*> nom
qui = ContactWho { _cw_id = id qui = ContactWho { _cw_id = id
, _cw_firstName = prenom , _cw_firstName = prenom
, _cw_lastName = nom , _cw_lastName = nom
...@@ -180,7 +182,7 @@ imtUser2gargContact (IMTUser { id ...@@ -180,7 +182,7 @@ imtUser2gargContact (IMTUser { id
-- meta = ContactMetaData (Just "IMT annuaire") date_modification' -- meta = ContactMetaData (Just "IMT annuaire") date_modification'
toList' Nothing = [] toList' Nothing = []
toList' (Just x) = [x] toList' (Just x) = [x]
...@@ -15,6 +15,7 @@ Portability : POSIX ...@@ -15,6 +15,7 @@ Portability : POSIX
module Gargantext.Core.Flow.Types where module Gargantext.Core.Flow.Types where
import Control.Lens import Control.Lens
import Gargantext.Database.Admin.Types.Hyperdata
import Gargantext.Database.Admin.Types.Node import Gargantext.Database.Admin.Types.Node
import Gargantext.Database.Schema.Node (node_hash_id) import Gargantext.Database.Schema.Node (node_hash_id)
import Gargantext.Prelude import Gargantext.Prelude
...@@ -24,6 +25,14 @@ class UniqId a ...@@ -24,6 +25,14 @@ class UniqId a
where where
uniqId :: Lens' a (Maybe Hash) uniqId :: Lens' a (Maybe Hash)
instance UniqId HyperdataDocument
where
uniqId = hd_uniqId
instance UniqId HyperdataContact
where
uniqId = hc_uniqId
instance UniqId (Node a) instance UniqId (Node a)
where where
uniqId = node_hash_id uniqId = node_hash_id
......
...@@ -19,13 +19,13 @@ module Gargantext.Core.Text.Corpus.API.Arxiv ...@@ -19,13 +19,13 @@ module Gargantext.Core.Text.Corpus.API.Arxiv
) where ) where
import Arxiv qualified as Arxiv import Arxiv qualified as Arxiv
import Conduit ( ConduitT, (.|), mapC, takeC ) import Conduit
import Data.Text (unpack) import Data.Text (unpack)
import Data.Text qualified as Text import Data.Text qualified as Text
import Gargantext.Core (Lang(..)) import Gargantext.Core (Lang(..))
import Gargantext.Core.Text.Corpus.Query as Corpus import Gargantext.Core.Text.Corpus.Query as Corpus
import Gargantext.Core.Types (Term(..)) import Gargantext.Core.Types (Term(..))
import Gargantext.Database.Admin.Types.Hyperdata.Document ( HyperdataDocument(..) ) import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..))
import Gargantext.Prelude hiding (get) import Gargantext.Prelude hiding (get)
import Network.Api.Arxiv qualified as Ax import Network.Api.Arxiv qualified as Ax
...@@ -46,7 +46,7 @@ convertQuery q = mkQuery (interpretQuery q transformAST) ...@@ -46,7 +46,7 @@ convertQuery q = mkQuery (interpretQuery q transformAST)
transformAST ast = case ast of transformAST ast = case ast of
BAnd sub (BConst (Negative term)) BAnd sub (BConst (Negative term))
-- The second term become positive, so that it can be translated. -- The second term become positive, so that it can be translated.
-> Ax.AndNot <$> transformAST sub <*> transformAST (BConst (Positive term)) -> Ax.AndNot <$> (transformAST sub) <*> transformAST (BConst (Positive term))
BAnd term1 (BNot term2) BAnd term1 (BNot term2)
-> Ax.AndNot <$> transformAST term1 <*> transformAST term2 -> Ax.AndNot <$> transformAST term1 <*> transformAST term2
BAnd sub1 sub2 BAnd sub1 sub2
...@@ -88,7 +88,7 @@ toDoc l (Arxiv.Result { abstract ...@@ -88,7 +88,7 @@ toDoc l (Arxiv.Result { abstract
, authors = aus , authors = aus
--, categories --, categories
, doi , doi
-- , id , id
, journal , journal
--, primaryCategory --, primaryCategory
, publication_date , publication_date
...@@ -99,6 +99,8 @@ toDoc l (Arxiv.Result { abstract ...@@ -99,6 +99,8 @@ toDoc l (Arxiv.Result { abstract
) = HyperdataDocument { _hd_bdd = Just "Arxiv" ) = HyperdataDocument { _hd_bdd = Just "Arxiv"
, _hd_doi = Just $ Text.pack doi , _hd_doi = Just $ Text.pack doi
, _hd_url = Just $ Text.pack url , _hd_url = Just $ Text.pack url
, _hd_uniqId = Just $ Text.pack id
, _hd_uniqIdBdd = Nothing
, _hd_page = Nothing , _hd_page = Nothing
, _hd_title = Just $ Text.pack title , _hd_title = Just $ Text.pack title
, _hd_authors = authors aus , _hd_authors = authors aus
...@@ -116,10 +118,13 @@ toDoc l (Arxiv.Result { abstract ...@@ -116,10 +118,13 @@ toDoc l (Arxiv.Result { abstract
where where
authors :: [Ax.Author] -> Maybe Text authors :: [Ax.Author] -> Maybe Text
authors [] = Nothing authors [] = Nothing
authors aus' = Just $ Text.intercalate ", " authors aus' = Just $ (Text.intercalate ", ")
$ map (Text.pack . Ax.auName) aus' $ map Text.pack
$ map Ax.auName aus'
institutes :: [Ax.Author] -> Maybe Text institutes :: [Ax.Author] -> Maybe Text
institutes [] = Nothing institutes [] = Nothing
institutes aus' = Just $ Text.intercalate ", " institutes aus' = Just $ (Text.intercalate ", ")
$ map ((Text.replace ", " " - " . Text.pack) . Ax.auFil) aus' $ (map (Text.replace ", " " - "))
$ map Text.pack
$ map Ax.auFil aus'
...@@ -9,7 +9,7 @@ Portability : POSIX ...@@ -9,7 +9,7 @@ Portability : POSIX
-} -}
module Gargantext.Core.Text.Corpus.API.EPO where module Gargantext.Core.Text.Corpus.API.EPO where
import Conduit ( ConduitT, (.|), mapC ) import Conduit
import Data.LanguageCodes (ISO639_1) import Data.LanguageCodes (ISO639_1)
import Data.Map.Strict qualified as Map import Data.Map.Strict qualified as Map
import Data.Text qualified as T import Data.Text qualified as T
...@@ -17,7 +17,7 @@ import EPO.API.Client.Types qualified as EPO ...@@ -17,7 +17,7 @@ import EPO.API.Client.Types qualified as EPO
import EPO.API.Client.Implementation qualified as EPO import EPO.API.Client.Implementation qualified as EPO
import Gargantext.Core (iso639ToText) import Gargantext.Core (iso639ToText)
import Gargantext.Core.Text.Corpus.Query qualified as Corpus import Gargantext.Core.Text.Corpus.Query qualified as Corpus
import Gargantext.Database.Admin.Types.Hyperdata.Document ( HyperdataDocument(..) ) import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..))
import Network.URI (parseURI) import Network.URI (parseURI)
import Protolude import Protolude
import Servant.Client.Core (ClientError(ConnectionError)) import Servant.Client.Core (ClientError(ConnectionError))
...@@ -39,7 +39,7 @@ get (Just authKey) epoAPIUrl q lang mLimit = do ...@@ -39,7 +39,7 @@ get (Just authKey) epoAPIUrl q lang mLimit = do
Just apiUrl -> do Just apiUrl -> do
eRes <- EPO.searchEPOAPIC apiUrl authKey Nothing limit (Corpus.getRawQuery q) eRes <- EPO.searchEPOAPIC apiUrl authKey Nothing limit (Corpus.getRawQuery q)
pure $ (\(total, itemsC) -> (Just total, itemsC .| mapC (toDoc lang))) <$> eRes pure $ (\(total, itemsC) -> (Just total, itemsC .| mapC (toDoc lang))) <$> eRes
-- EPO.Paginated { .. } <- EPO.searchEPOAPI apiUrl authKey 1 20 (Corpus.getRawQuery q) -- EPO.Paginated { .. } <- EPO.searchEPOAPI apiUrl authKey 1 20 (Corpus.getRawQuery q)
-- pure $ Right ( Just $ fromIntegral total, yieldMany items .| mapC (toDoc lang) ) -- pure $ Right ( Just $ fromIntegral total, yieldMany items .| mapC (toDoc lang) )
...@@ -48,6 +48,8 @@ toDoc lang (EPO.HyperdataDocument { .. }) = ...@@ -48,6 +48,8 @@ toDoc lang (EPO.HyperdataDocument { .. }) =
HyperdataDocument { _hd_bdd = Just "EPO" HyperdataDocument { _hd_bdd = Just "EPO"
, _hd_doi = Nothing , _hd_doi = Nothing
, _hd_url = Nothing , _hd_url = Nothing
, _hd_uniqId = id
, _hd_uniqIdBdd = id
, _hd_page = Nothing , _hd_page = Nothing
, _hd_title = Map.lookup lang titles , _hd_title = Map.lookup lang titles
, _hd_authors = authors_ , _hd_authors = authors_
...@@ -64,10 +66,10 @@ toDoc lang (EPO.HyperdataDocument { .. }) = ...@@ -64,10 +66,10 @@ toDoc lang (EPO.HyperdataDocument { .. }) =
, _hd_language_iso2 = Just $ iso639ToText lang } , _hd_language_iso2 = Just $ iso639ToText lang }
where where
authors_ = if null authors authors_ = if authors == []
then Nothing then Nothing
else Just (T.intercalate ", " authors) else Just (T.intercalate ", " authors)
-- EPO.withAuthKey authKey $ \token -> do -- EPO.withAuthKey authKey $ \token -> do
-- let range = EPO.Range { rBegin = 1, rEnd = limit } -- let range = EPO.Range { rBegin = 1, rEnd = limit }
-- (len, docsC) <- EPO.searchPublishedDataWithFetchC token (Just $ Corpus.getRawQuery q) (Just range) -- (len, docsC) <- EPO.searchPublishedDataWithFetchC token (Just $ Corpus.getRawQuery q) (Just range)
......
...@@ -12,12 +12,14 @@ Portability : POSIX ...@@ -12,12 +12,14 @@ Portability : POSIX
module Gargantext.Core.Text.Corpus.API.Hal module Gargantext.Core.Text.Corpus.API.Hal
where where
import Conduit ( ConduitT, (.|), mapMC ) import Conduit
import Data.Either
import Data.LanguageCodes qualified as ISO639 import Data.LanguageCodes qualified as ISO639
import Data.Map.Strict qualified as Map import Data.Map.Strict qualified as Map
import Data.Text (pack) import Data.Maybe
import Data.Text (pack, intercalate)
import Gargantext.Core.Text.Corpus.Parsers.Date qualified as Date import Gargantext.Core.Text.Corpus.Parsers.Date qualified as Date
import Gargantext.Database.Admin.Types.Hyperdata.Document ( HyperdataDocument(..) ) import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..))
import Gargantext.Defaults qualified as Defaults import Gargantext.Defaults qualified as Defaults
import Gargantext.Prelude hiding (intercalate) import Gargantext.Prelude hiding (intercalate)
import HAL qualified as HAL import HAL qualified as HAL
...@@ -28,7 +30,7 @@ import Servant.Client (ClientError) ...@@ -28,7 +30,7 @@ import Servant.Client (ClientError)
get :: Maybe ISO639.ISO639_1 -> Text -> Maybe Int -> IO [HyperdataDocument] get :: Maybe ISO639.ISO639_1 -> Text -> Maybe Int -> IO [HyperdataDocument]
get la q ml = do get la q ml = do
eDocs <- HAL.getMetadataWith [q] (Just 0) (fromIntegral <$> ml) la eDocs <- HAL.getMetadataWith [q] (Just 0) (fromIntegral <$> ml) la
either (panicTrace . pack . show) (mapM (toDoc' la) . HAL._docs) eDocs either (panicTrace . pack . show) (\d -> mapM (toDoc' la) $ HAL._docs d) eDocs
getC :: Maybe ISO639.ISO639_1 -> Text -> Maybe Int -> IO (Either ClientError (Maybe Integer, ConduitT () HyperdataDocument IO ())) getC :: Maybe ISO639.ISO639_1 -> Text -> Maybe Int -> IO (Either ClientError (Maybe Integer, ConduitT () HyperdataDocument IO ()))
getC la q ml = do getC la q ml = do
...@@ -43,15 +45,17 @@ toDoc' la (HAL.Corpus { .. }) = do ...@@ -43,15 +45,17 @@ toDoc' la (HAL.Corpus { .. }) = do
-- printDebug "[toDoc corpus] h" h -- printDebug "[toDoc corpus] h" h
let mDateS = maybe (Just $ pack $ show Defaults.year) Just _corpus_date let mDateS = maybe (Just $ pack $ show Defaults.year) Just _corpus_date
let (utctime, (pub_year, pub_month, pub_day)) = Date.mDateSplit mDateS let (utctime, (pub_year, pub_month, pub_day)) = Date.mDateSplit mDateS
let abstractDefault = unwords _corpus_abstract let abstractDefault = intercalate " " _corpus_abstract
let abstract = case la of let abstract = case la of
Nothing -> abstractDefault Nothing -> abstractDefault
Just l -> maybe abstractDefault unwords (Map.lookup l _corpus_abstract_lang_map) Just l -> fromMaybe abstractDefault (intercalate " " <$> Map.lookup l _corpus_abstract_lang_map)
pure HyperdataDocument { _hd_bdd = Just "Hal" pure HyperdataDocument { _hd_bdd = Just "Hal"
, _hd_doi = Just $ pack $ show _corpus_docid , _hd_doi = Just $ pack $ show _corpus_docid
, _hd_url = Nothing , _hd_url = Nothing
, _hd_uniqId = Nothing
, _hd_uniqIdBdd = Nothing
, _hd_page = Nothing , _hd_page = Nothing
, _hd_title = Just $ unwords _corpus_title , _hd_title = Just $ intercalate " " _corpus_title
, _hd_authors = Just $ foldl (\x y -> if x == "" then y else x <> ", " <> y) "" _corpus_authors_names , _hd_authors = Just $ foldl (\x y -> if x == "" then y else x <> ", " <> y) "" _corpus_authors_names
, _hd_institutes = Just $ foldl (\x y -> if x == "" then y else x <> ", " <> y) "" $ _corpus_authors_affiliations <> map show _corpus_struct_id , _hd_institutes = Just $ foldl (\x y -> if x == "" then y else x <> ", " <> y) "" $ _corpus_authors_affiliations <> map show _corpus_struct_id
, _hd_source = Just $ maybe "Nothing" identity _corpus_source , _hd_source = Just $ maybe "Nothing" identity _corpus_source
......
...@@ -18,12 +18,12 @@ import Gargantext.Core (Lang(..)) ...@@ -18,12 +18,12 @@ import Gargantext.Core (Lang(..))
import Gargantext.Core.Text.Corpus.Parsers (cleanText) import Gargantext.Core.Text.Corpus.Parsers (cleanText)
import Gargantext.Core.Text.Corpus.Parsers.CSV (writeDocs2Csv) import Gargantext.Core.Text.Corpus.Parsers.CSV (writeDocs2Csv)
import Gargantext.Core.Text.Corpus.Parsers.Date qualified as Date import Gargantext.Core.Text.Corpus.Parsers.Date qualified as Date
import Gargantext.Database.Admin.Types.Hyperdata.Document ( HyperdataDocument(..) ) import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..))
import Gargantext.Defaults qualified as Defaults import Gargantext.Defaults qualified as Defaults
import Gargantext.Prelude hiding (get) import Gargantext.Prelude hiding (get)
import Isidore qualified as Isidore import Isidore qualified as Isidore
import Isidore.Client import Isidore.Client
import Servant.Client ( ClientError(DecodeFailure) ) import Servant.Client
-- | TODO work with the ServantErr -- | TODO work with the ServantErr
get :: Lang -> Maybe Isidore.Limit get :: Lang -> Maybe Isidore.Limit
...@@ -40,7 +40,7 @@ get la l q a = do ...@@ -40,7 +40,7 @@ get la l q a = do
iDocs <- either printErr _content <$> Isidore.get l q a iDocs <- either printErr _content <$> Isidore.get l q a
hDocs <- mapM (isidoreToDoc la) (toIsidoreDocs iDocs) hDocs <- mapM (\d -> isidoreToDoc la d) (toIsidoreDocs iDocs)
pure hDocs pure hDocs
isidore2csvFile :: FilePath -> Lang -> Maybe Isidore.Limit isidore2csvFile :: FilePath -> Lang -> Maybe Isidore.Limit
...@@ -54,7 +54,7 @@ isidoreToDoc :: Lang -> IsidoreDoc -> IO HyperdataDocument ...@@ -54,7 +54,7 @@ isidoreToDoc :: Lang -> IsidoreDoc -> IO HyperdataDocument
isidoreToDoc l (IsidoreDoc t a d u s as) = do isidoreToDoc l (IsidoreDoc t a d u s as) = do
let let
author :: Author -> Text author :: Author -> Text
author (Author fn ln) = _name fn <> ", " <> _name ln author (Author fn ln) = (_name fn) <> ", " <> (_name ln)
author (Authors aus) = Text.intercalate ". " $ map author aus author (Authors aus) = Text.intercalate ". " $ map author aus
creator2text :: Creator -> Text creator2text :: Creator -> Text
...@@ -66,19 +66,21 @@ isidoreToDoc l (IsidoreDoc t a d u s as) = do ...@@ -66,19 +66,21 @@ isidoreToDoc l (IsidoreDoc t a d u s as) = do
langText (OnlyText t2 ) = t2 langText (OnlyText t2 ) = t2
langText (ArrayText ts ) = Text.intercalate " " $ map langText ts langText (ArrayText ts ) = Text.intercalate " " $ map langText ts
let mDateS = maybe (Just $ Text.pack $ show Defaults.year) Just d let mDateS = maybe (Just $ Text.pack $ show Defaults.year) (Just) d
let (utcTime, (pub_year, pub_month, pub_day)) = Date.mDateSplit mDateS let (utcTime, (pub_year, pub_month, pub_day)) = Date.mDateSplit mDateS
pure HyperdataDocument pure HyperdataDocument
{ _hd_bdd = Just "Isidore" { _hd_bdd = Just "Isidore"
, _hd_doi = Nothing , _hd_doi = Nothing
, _hd_url = u , _hd_url = u
, _hd_uniqId = Nothing
, _hd_uniqIdBdd = Nothing
, _hd_page = Nothing , _hd_page = Nothing
, _hd_title = Just $ cleanText $ langText t , _hd_title = Just $ cleanText $ langText t
, _hd_authors = creator2text <$> as , _hd_authors = creator2text <$> as
, _hd_institutes = Nothing , _hd_institutes = Nothing
, _hd_source = Just $ maybe "Nothing" (identity . _sourceName) s , _hd_source = Just $ maybe "Nothing" identity $ _sourceName <$> s
, _hd_abstract = cleanText . langText <$> a , _hd_abstract = cleanText <$> langText <$> a
, _hd_publication_date = fmap (Text.pack . show) utcTime , _hd_publication_date = fmap (Text.pack . show) utcTime
, _hd_publication_year = pub_year , _hd_publication_year = pub_year
, _hd_publication_month = pub_month , _hd_publication_month = pub_month
......
...@@ -9,15 +9,15 @@ Portability : POSIX ...@@ -9,15 +9,15 @@ Portability : POSIX
-} -}
module Gargantext.Core.Text.Corpus.API.OpenAlex where module Gargantext.Core.Text.Corpus.API.OpenAlex where
import Conduit ( ConduitT, (.|), mapC, takeC ) import Conduit
import Data.LanguageCodes qualified as ISO639 import Data.LanguageCodes qualified as ISO639
import Data.Text qualified as T import qualified Data.Text as T
import Gargantext.Core (iso639ToText) import Gargantext.Core (iso639ToText)
import Gargantext.Core.Text.Corpus.Query as Corpus import Gargantext.Core.Text.Corpus.Query as Corpus
import Gargantext.Database.Admin.Types.Hyperdata.Document (HyperdataDocument(..)) import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..))
import Protolude import Protolude
import OpenAlex qualified as OA import qualified OpenAlex as OA
import OpenAlex.Types qualified as OA import qualified OpenAlex.Types as OA
import Servant.Client (ClientError) import Servant.Client (ClientError)
...@@ -37,6 +37,8 @@ toDoc (OA.Work { .. } ) = ...@@ -37,6 +37,8 @@ toDoc (OA.Work { .. } ) =
HyperdataDocument { _hd_bdd = Just "OpenAlex" HyperdataDocument { _hd_bdd = Just "OpenAlex"
, _hd_doi = doi , _hd_doi = doi
, _hd_url = url , _hd_url = url
, _hd_uniqId = Just id
, _hd_uniqIdBdd = Just id
, _hd_page = firstPage biblio , _hd_page = firstPage biblio
, _hd_title = title , _hd_title = title
, _hd_authors = authors authorships , _hd_authors = authors authorships
...@@ -53,25 +55,25 @@ toDoc (OA.Work { .. } ) = ...@@ -53,25 +55,25 @@ toDoc (OA.Work { .. } ) =
, _hd_language_iso2 = language } , _hd_language_iso2 = language }
where where
firstPage :: OA.Biblio -> Maybe Int firstPage :: OA.Biblio -> Maybe Int
firstPage OA.Biblio { first_page } = (readMaybe . T.unpack) =<< first_page firstPage OA.Biblio { first_page } = maybe Nothing readMaybe $ T.unpack <$> first_page
authors :: [OA.Authorship] -> Maybe Text authors :: [OA.Authorship] -> Maybe Text
authors [] = Nothing authors [] = Nothing
authors aus = Just $ T.intercalate ", " $ mapMaybe getDisplayName aus authors aus = Just $ T.intercalate ", " $ catMaybes (getDisplayName <$> aus)
where where
getDisplayName :: OA.Authorship -> Maybe Text getDisplayName :: OA.Authorship -> Maybe Text
getDisplayName OA.Authorship { author = OA.DehydratedAuthor { display_name = dn } } = dn getDisplayName OA.Authorship { author = OA.DehydratedAuthor { display_name = dn } } = dn
institutes :: [OA.Authorship] -> Maybe Text institutes :: [OA.Authorship] -> Maybe Text
institutes [] = Nothing institutes [] = Nothing
institutes aus = Just $ T.intercalate ", " (T.replace ", " " - " . getInstitutesNames <$> aus) institutes aus = Just $ T.intercalate ", " ((T.replace ", " " - ") . getInstitutesNames <$> aus)
where where
getInstitutesNames OA.Authorship { institutions } = T.intercalate ", " $ getDisplayName <$> institutions getInstitutesNames OA.Authorship { institutions } = T.intercalate ", " $ getDisplayName <$> institutions
getDisplayName :: OA.DehydratedInstitution -> Text getDisplayName :: OA.DehydratedInstitution -> Text
getDisplayName OA.DehydratedInstitution { display_name = dn } = dn getDisplayName OA.DehydratedInstitution { display_name = dn } = dn
source :: Maybe Text source :: Maybe Text
source = getSource =<< primary_location source = maybe Nothing getSource primary_location
where where
getSource OA.Location { source = s } = getSourceDisplayName <$> s getSource OA.Location { source = s } = getSourceDisplayName <$> s
getSourceDisplayName OA.DehydratedSource { display_name = dn } = dn getSourceDisplayName OA.DehydratedSource { display_name = dn } = dn
...@@ -20,13 +20,13 @@ module Gargantext.Core.Text.Corpus.API.Pubmed ...@@ -20,13 +20,13 @@ module Gargantext.Core.Text.Corpus.API.Pubmed
) )
where where
import Conduit ( ConduitT, (.|), mapC, takeC ) import Conduit
import Data.Text qualified as Text import Data.Text qualified as Text
import Data.Text.Encoding qualified as TE import Data.Text.Encoding qualified as TE
import Gargantext.Core (Lang(..)) import Gargantext.Core (Lang(..))
import Gargantext.Core.Text.Corpus.Query as Corpus import Gargantext.Core.Text.Corpus.Query as Corpus
import Gargantext.Core.Types (Term(..)) import Gargantext.Core.Types (Term(..))
import Gargantext.Database.Admin.Types.Hyperdata.Document ( HyperdataDocument(..) ) import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..))
import Gargantext.Prelude hiding (get) import Gargantext.Prelude hiding (get)
import Network.HTTP.Types.URI (EscapeItem(..), renderQueryPartialEscape) import Network.HTTP.Types.URI (EscapeItem(..), renderQueryPartialEscape)
import PUBMED qualified as PubMed import PUBMED qualified as PubMed
...@@ -64,7 +64,7 @@ convertQuery q = ESearch (interpretQuery q transformAST) ...@@ -64,7 +64,7 @@ convertQuery q = ESearch (interpretQuery q transformAST)
transformAST ast = case ast of transformAST ast = case ast of
BAnd sub (BConst (Negative term)) BAnd sub (BConst (Negative term))
-- The second term become positive, so that it can be translated. -- The second term become positive, so that it can be translated.
-> transformAST sub <> [QN "+AND+NOT+"] <> transformAST (BConst (Positive term)) -> (transformAST sub) <> [QN "+AND+NOT+"] <> transformAST (BConst (Positive term))
BAnd term1 (BNot term2) BAnd term1 (BNot term2)
-> transformAST term1 <> [QN "+AND+NOT+"] <> transformAST term2 -> transformAST term1 <> [QN "+AND+NOT+"] <> transformAST term2
BAnd sub1 sub2 BAnd sub1 sub2
...@@ -108,11 +108,14 @@ get apiKey q l = do ...@@ -108,11 +108,14 @@ get apiKey q l = do
-- <$> PubMed.getMetadataWithC q l -- <$> PubMed.getMetadataWithC q l
toDoc :: Lang -> PubMedDoc.PubMed -> HyperdataDocument toDoc :: Lang -> PubMedDoc.PubMed -> HyperdataDocument
toDoc l (PubMedDoc.PubMed { pubmed_article = PubMedDoc.PubMedArticle t j as aus toDoc l (PubMedDoc.PubMed { pubmed_id
, pubmed_article = PubMedDoc.PubMedArticle t j as aus
, pubmed_date = PubMedDoc.PubMedDate a y m d } , pubmed_date = PubMedDoc.PubMedDate a y m d }
) = HyperdataDocument { _hd_bdd = Just "PubMed" ) = HyperdataDocument { _hd_bdd = Just "PubMed"
, _hd_doi = Nothing , _hd_doi = Nothing
, _hd_url = Nothing , _hd_url = Nothing
, _hd_uniqId = Just $ Text.pack $ show pubmed_id
, _hd_uniqIdBdd = Nothing
, _hd_page = Nothing , _hd_page = Nothing
, _hd_title = t , _hd_title = t
, _hd_authors = authors aus , _hd_authors = authors aus
...@@ -130,14 +133,16 @@ toDoc l (PubMedDoc.PubMed { pubmed_article = PubMedDoc.PubMedArticle t j as aus ...@@ -130,14 +133,16 @@ toDoc l (PubMedDoc.PubMed { pubmed_article = PubMedDoc.PubMedArticle t j as aus
where where
authors :: [PubMedDoc.Author] -> Maybe Text authors :: [PubMedDoc.Author] -> Maybe Text
authors [] = Nothing authors [] = Nothing
authors au = Just $ Text.intercalate ", " authors au = Just $ (Text.intercalate ", ")
$ mapMaybe (\n -> PubMedDoc.foreName n <> Just " " <> PubMedDoc.lastName n) au $ catMaybes
$ map (\n -> PubMedDoc.foreName n <> Just " " <> PubMedDoc.lastName n) au
institutes :: [PubMedDoc.Author] -> Maybe Text institutes :: [PubMedDoc.Author] -> Maybe Text
institutes [] = Nothing institutes [] = Nothing
institutes au = Just $ Text.intercalate ", " institutes au = Just $ (Text.intercalate ", ")
$ map (Text.replace ", " " - ") $ (map (Text.replace ", " " - "))
$ mapMaybe PubMedDoc.affiliation au $ catMaybes
$ map PubMedDoc.affiliation au
abstract :: [Text] -> Maybe Text abstract :: [Text] -> Maybe Text
......
...@@ -46,7 +46,7 @@ import Gargantext.Core.Text.Corpus.Parsers.JSON (parseJSONC, parseIstex) ...@@ -46,7 +46,7 @@ import Gargantext.Core.Text.Corpus.Parsers.JSON (parseJSONC, parseIstex)
import Gargantext.Core.Text.Corpus.Parsers.RIS qualified as RIS import Gargantext.Core.Text.Corpus.Parsers.RIS qualified as RIS
import Gargantext.Core.Text.Corpus.Parsers.RIS.Presse (presseEnrich) import Gargantext.Core.Text.Corpus.Parsers.RIS.Presse (presseEnrich)
import Gargantext.Core.Text.Corpus.Parsers.WOS qualified as WOS import Gargantext.Core.Text.Corpus.Parsers.WOS qualified as WOS
import Gargantext.Database.Admin.Types.Hyperdata.Document ( HyperdataDocument(..) ) import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..))
import Gargantext.Database.Query.Table.Ngrams (NgramsType(..)) import Gargantext.Database.Query.Table.Ngrams (NgramsType(..))
import Gargantext.Prelude hiding (show, undefined) import Gargantext.Prelude hiding (show, undefined)
import Gargantext.Utils.Zip qualified as UZip import Gargantext.Utils.Zip qualified as UZip
...@@ -88,10 +88,10 @@ parseFormatC :: MonadBaseControl IO m ...@@ -88,10 +88,10 @@ parseFormatC :: MonadBaseControl IO m
-> m (Either Text (Integer, ConduitT () HyperdataDocument IO ())) -> m (Either Text (Integer, ConduitT () HyperdataDocument IO ()))
parseFormatC CsvGargV3 Plain bs = do parseFormatC CsvGargV3 Plain bs = do
let eParsedC = parseCsvC $ DBL.fromStrict bs let eParsedC = parseCsvC $ DBL.fromStrict bs
pure (second (transPipe (pure . runIdentity)) <$> eParsedC) pure ((\(len, parsedC) -> (len, transPipe (pure . runIdentity) parsedC)) <$> eParsedC)
parseFormatC CsvHal Plain bs = do parseFormatC CsvHal Plain bs = do
let eParsedC = parseCsvC $ DBL.fromStrict bs let eParsedC = parseCsvC $ DBL.fromStrict bs
pure (second (transPipe (pure . runIdentity)) <$> eParsedC) pure ((\(len, parsedC) -> (len, transPipe (pure . runIdentity) parsedC)) <$> eParsedC)
parseFormatC Istex Plain bs = do parseFormatC Istex Plain bs = do
ep <- liftBase $ parseIstex EN $ DBL.fromStrict bs ep <- liftBase $ parseIstex EN $ DBL.fromStrict bs
pure $ (\p -> (1, yieldMany [p])) <$> ep pure $ (\p -> (1, yieldMany [p])) <$> ep
...@@ -120,15 +120,15 @@ parseFormatC Iramuteq Plain bs = do ...@@ -120,15 +120,15 @@ parseFormatC Iramuteq Plain bs = do
, yieldMany docs , yieldMany docs
.| mapC (map $ first Iramuteq.keys) .| mapC (map $ first Iramuteq.keys)
.| mapC (map $ both decodeUtf8) .| mapC (map $ both decodeUtf8)
.| mapMC (toDoc Iramuteq . map (second (DT.replace "_" " "))) .| mapMC ((toDoc Iramuteq) . (map (second (DT.replace "_" " "))))
) )
) )
<$> eDocs <$> eDocs
parseFormatC JSON Plain bs = do parseFormatC JSON Plain bs = do
let eParsedC = parseJSONC $ DBL.fromStrict bs let eParsedC = parseJSONC $ DBL.fromStrict bs
pure (second (transPipe (pure . runIdentity)) <$> eParsedC) pure ((\(len, parsedC) -> (len, transPipe (pure . runIdentity) parsedC)) <$> eParsedC)
parseFormatC ft ZIP bs = liftBase $ UZip.withZipFileBS bs $ do parseFormatC ft ZIP bs = liftBase $ UZip.withZipFileBS bs $ do
fileNames <- filter (filterZIPFileNameP ft) . DM.keys <$> getEntries fileNames <- filter (filterZIPFileNameP ft) <$> DM.keys <$> getEntries
printDebug "[parseFormatC] fileNames" fileNames printDebug "[parseFormatC] fileNames" fileNames
fileContents <- mapM getEntry fileNames fileContents <- mapM getEntry fileNames
--printDebug "[parseFormatC] fileContents" fileContents --printDebug "[parseFormatC] fileContents" fileContents
...@@ -145,19 +145,19 @@ parseFormatC ft ZIP bs = liftBase $ UZip.withZipFileBS bs $ do ...@@ -145,19 +145,19 @@ parseFormatC ft ZIP bs = liftBase $ UZip.withZipFileBS bs $ do
let contents' = snd <$> contents let contents' = snd <$> contents
let totalLength = sum lenghts let totalLength = sum lenghts
pure $ Right ( totalLength pure $ Right ( totalLength
, void (sequenceConduits contents') ) -- .| mapM_C (printDebug "[parseFormatC] doc") , sequenceConduits contents' >> pure () ) -- .| mapM_C (printDebug "[parseFormatC] doc")
_ -> pure $ Left $ DT.intercalate "\n" errs _ -> pure $ Left $ DT.intercalate "\n" errs
parseFormatC _ _ _ = pure $ Left "Not implemented" parseFormatC _ _ _ = pure $ Left "Not implemented"
filterZIPFileNameP :: FileType -> EntrySelector -> Bool filterZIPFileNameP :: FileType -> EntrySelector -> Bool
filterZIPFileNameP Istex f = (takeExtension (unEntrySelector f) == ".json") && filterZIPFileNameP Istex f = (takeExtension (unEntrySelector f) == ".json") &&
(unEntrySelector f /= "manifest.json") ((unEntrySelector f) /= "manifest.json")
filterZIPFileNameP _ _ = True filterZIPFileNameP _ _ = True
etale :: [HyperdataDocument] -> [HyperdataDocument] etale :: [HyperdataDocument] -> [HyperdataDocument]
etale = concatMap etale' etale = concat . (map etale')
where where
etale' :: HyperdataDocument -> [HyperdataDocument] etale' :: HyperdataDocument -> [HyperdataDocument]
etale' h = map (\t -> h { _hd_abstract = Just t }) etale' h = map (\t -> h { _hd_abstract = Just t })
...@@ -232,6 +232,8 @@ toDoc ff d = do ...@@ -232,6 +232,8 @@ toDoc ff d = do
let hd = HyperdataDocument { _hd_bdd = Just $ DT.pack $ show ff let hd = HyperdataDocument { _hd_bdd = Just $ DT.pack $ show ff
, _hd_doi = lookup "doi" d , _hd_doi = lookup "doi" d
, _hd_url = lookup "URL" d , _hd_url = lookup "URL" d
, _hd_uniqId = Nothing
, _hd_uniqIdBdd = Nothing
, _hd_page = Nothing , _hd_page = Nothing
, _hd_title = lookup "title" d , _hd_title = lookup "title" d
, _hd_authors = lookup "authors" d , _hd_authors = lookup "authors" d
...@@ -291,7 +293,7 @@ runParser format text = pure $ runParser' format text ...@@ -291,7 +293,7 @@ runParser format text = pure $ runParser' format text
runParser' :: FileType runParser' :: FileType
-> DB.ByteString -> DB.ByteString
-> Either Text [[(DB.ByteString, DB.ByteString)]] -> (Either Text [[(DB.ByteString, DB.ByteString)]])
runParser' format text = first DT.pack $ parseOnly (withParser format) text runParser' format text = first DT.pack $ parseOnly (withParser format) text
openZip :: FilePath -> IO [DB.ByteString] openZip :: FilePath -> IO [DB.ByteString]
...@@ -315,5 +317,5 @@ clean txt = DBC.map clean' txt ...@@ -315,5 +317,5 @@ clean txt = DBC.map clean' txt
-- --
splitOn :: NgramsType -> Maybe Text -> Text -> [Text] splitOn :: NgramsType -> Maybe Text -> Text -> [Text]
splitOn Authors (Just "WOS") = DT.splitOn "; " splitOn Authors (Just "WOS") = (DT.splitOn "; ")
splitOn _ _ = DT.splitOn ", " splitOn _ _ = (DT.splitOn ", ")
...@@ -33,8 +33,8 @@ book2csv :: Int -> FileDir -> FileOut -> IO () ...@@ -33,8 +33,8 @@ book2csv :: Int -> FileDir -> FileOut -> IO ()
book2csv n f_in f_out = do book2csv n f_in f_out = do
files <- filesOf f_in files <- filesOf f_in
texts <- readPublis f_in files texts <- readPublis f_in files
let publis = concatMap (file2publi n) texts let publis = List.concat $ map (file2publi n) texts
let docs = zipWith publiToHyperdata [1..] publis let docs = map (\(y,p) -> publiToHyperdata y p) $ List.zip [1..] publis
DBL.writeFile f_out (hyperdataDocument2csv docs) DBL.writeFile f_out (hyperdataDocument2csv docs)
filesOf :: FileDir -> IO [FilePath] filesOf :: FileDir -> IO [FilePath]
...@@ -43,7 +43,7 @@ filesOf fd = List.sort -- sort by filenam ...@@ -43,7 +43,7 @@ filesOf fd = List.sort -- sort by filenam
<$> getDirectoryContents fd <$> getDirectoryContents fd
readPublis :: FileDir -> [FilePath] -> IO [(FilePath, Text)] readPublis :: FileDir -> [FilePath] -> IO [(FilePath, Text)]
readPublis fd = mapM (\fp -> DBL.readFile (fd <> fp) >>= \txt -> pure (fp, cs txt)) readPublis fd fps = mapM (\fp -> DBL.readFile (fd <> fp) >>= \txt -> pure (fp, cs txt)) fps
------------------------------------------------------------------------ ------------------------------------------------------------------------
-- Main Types -- Main Types
...@@ -63,7 +63,7 @@ type FileDir = FilePath ...@@ -63,7 +63,7 @@ type FileDir = FilePath
--------------------------------------------------------------------- ---------------------------------------------------------------------
file2publi :: Int -> (FilePath, Text) -> [Publi] file2publi :: Int -> (FilePath, Text) -> [Publi]
file2publi n (fp,theText) = map (uncurry (Publi authors source)) theTexts file2publi n (fp,theText) = map (\(t,txt) -> Publi authors source t txt) theTexts
where where
theTexts = text2titleParagraphs n theText theTexts = text2titleParagraphs n theText
FileInfo authors source = fileNameInfo fp FileInfo authors source = fileNameInfo fp
...@@ -81,6 +81,8 @@ publiToHyperdata y (Publi a s t txt) = ...@@ -81,6 +81,8 @@ publiToHyperdata y (Publi a s t txt) =
HyperdataDocument { _hd_bdd = Just "Book File" HyperdataDocument { _hd_bdd = Just "Book File"
, _hd_doi = Nothing , _hd_doi = Nothing
, _hd_url = Nothing , _hd_url = Nothing
, _hd_uniqId = Nothing
, _hd_uniqIdBdd = Nothing
, _hd_page = Nothing , _hd_page = Nothing
, _hd_title = Just t , _hd_title = Just t
, _hd_authors = Just (DT.concat a) , _hd_authors = Just (DT.concat a)
......
...@@ -14,7 +14,8 @@ CSV parser for Gargantext corpus files. ...@@ -14,7 +14,8 @@ CSV parser for Gargantext corpus files.
module Gargantext.Core.Text.Corpus.Parsers.CSV where module Gargantext.Core.Text.Corpus.Parsers.CSV where
import Conduit ( ConduitT, (.|), yieldMany, mapC ) import Conduit
import Control.Applicative
import Data.ByteString qualified as BS import Data.ByteString qualified as BS
import Data.ByteString.Lazy qualified as BL import Data.ByteString.Lazy qualified as BL
import Data.Csv import Data.Csv
...@@ -23,9 +24,9 @@ import Data.Text qualified as T ...@@ -23,9 +24,9 @@ import Data.Text qualified as T
import Data.Time.Segment (jour) import Data.Time.Segment (jour)
import Data.Vector (Vector) import Data.Vector (Vector)
import Data.Vector qualified as V import Data.Vector qualified as V
import Gargantext.Core.Text ( sentences, unsentences ) import Gargantext.Core.Text
import Gargantext.Core.Text.Context ( splitBy, SplitContext(..) ) import Gargantext.Core.Text.Context
import Gargantext.Database.Admin.Types.Hyperdata.Document ( HyperdataDocument(..) ) import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..))
import Gargantext.Prelude hiding (length, show) import Gargantext.Prelude hiding (length, show)
import Protolude import Protolude
...@@ -59,6 +60,8 @@ toDoc (CsvGargV3 did dt _ dpy dpm dpd dab dau) = ...@@ -59,6 +60,8 @@ toDoc (CsvGargV3 did dt _ dpy dpm dpd dab dau) =
HyperdataDocument { _hd_bdd = Just "CSV" HyperdataDocument { _hd_bdd = Just "CSV"
, _hd_doi = Just . pack . show $ did , _hd_doi = Just . pack . show $ did
, _hd_url = Nothing , _hd_url = Nothing
, _hd_uniqId = Nothing
, _hd_uniqIdBdd = Nothing
, _hd_page = Nothing , _hd_page = Nothing
, _hd_title = Just dt , _hd_title = Just dt
, _hd_authors = Nothing , _hd_authors = Nothing
...@@ -90,11 +93,11 @@ toDocs v = V.toList ...@@ -90,11 +93,11 @@ toDocs v = V.toList
(V.enumFromN 1 (V.length v'')) v'' (V.enumFromN 1 (V.length v'')) v''
where where
v'' = V.foldl (\v' sep -> V.concatMap (splitDoc (docsSize v') sep) v') v seps v'' = V.foldl (\v' sep -> V.concatMap (splitDoc (docsSize v') sep) v') v seps
seps= V.fromList [Paragraphs 1, Sentences 3, Chars 3] seps= (V.fromList [Paragraphs 1, Sentences 3, Chars 3])
--------------------------------------------------------------- ---------------------------------------------------------------
fromDocs :: Vector CsvGargV3 -> Vector CsvDoc fromDocs :: Vector CsvGargV3 -> Vector CsvDoc
fromDocs = V.map fromDocs' fromDocs docs = V.map fromDocs' docs
where where
fromDocs' (CsvGargV3 { .. }) = CsvDoc { csv_title = d_title fromDocs' (CsvGargV3 { .. }) = CsvDoc { csv_title = d_title
, csv_source = d_source , csv_source = d_source
...@@ -108,11 +111,16 @@ fromDocs = V.map fromDocs' ...@@ -108,11 +111,16 @@ fromDocs = V.map fromDocs'
-- | Split a document in its context -- | Split a document in its context
-- TODO adapt the size of the paragraph according to the corpus average -- TODO adapt the size of the paragraph according to the corpus average
splitDoc :: Mean -> SplitContext -> CsvDoc -> Vector CsvDoc splitDoc :: Mean -> SplitContext -> CsvDoc -> Vector CsvDoc
splitDoc m splt doc = splitDoc m splt doc = let docSize = (T.length $ csv_abstract doc) in
let docSize = (T.length $ csv_abstract doc) in if docSize > 1000
if (docSize > 1000) && (mod (round m) docSize >= 10) then
then splitDoc' splt doc if (mod (round m) docSize) >= 10
else V.fromList [doc] then
splitDoc' splt doc
else
V.fromList [doc]
else
V.fromList [doc]
where where
splitDoc' :: SplitContext -> CsvDoc -> Vector CsvDoc splitDoc' :: SplitContext -> CsvDoc -> Vector CsvDoc
splitDoc' contextSize (CsvDoc { .. }) = V.fromList $ [firstDoc] <> nextDocs splitDoc' contextSize (CsvDoc { .. }) = V.fromList $ [firstDoc] <> nextDocs
...@@ -144,7 +152,7 @@ unIntOrDec :: IntOrDec -> Int ...@@ -144,7 +152,7 @@ unIntOrDec :: IntOrDec -> Int
unIntOrDec (IntOrDec i) = i unIntOrDec (IntOrDec i) = i
instance FromField IntOrDec where instance FromField IntOrDec where
parseField s = case runParser (parseField s :: Parser Int) of parseField s = case runParser (parseField s :: Parser Int) of
Left _err -> IntOrDec . floor <$> (parseField s :: Parser Double) Left _err -> IntOrDec <$> floor <$> (parseField s :: Parser Double)
Right n -> pure $ IntOrDec n Right n -> pure $ IntOrDec n
instance ToField IntOrDec where instance ToField IntOrDec where
toField (IntOrDec i) = toField i toField (IntOrDec i) = toField i
...@@ -245,15 +253,15 @@ readByteStringStrict :: (FromNamedRecord a) ...@@ -245,15 +253,15 @@ readByteStringStrict :: (FromNamedRecord a)
-> Delimiter -> Delimiter
-> BS.ByteString -> BS.ByteString
-> Either Text (Header, Vector a) -> Either Text (Header, Vector a)
readByteStringStrict d ff = readByteStringLazy d ff . BL.fromStrict readByteStringStrict d ff = (readByteStringLazy d ff) . BL.fromStrict
------------------------------------------------------------------------ ------------------------------------------------------------------------
-- | TODO use readFileLazy -- | TODO use readFileLazy
readCSVFile :: FilePath -> IO (Either Text (Header, Vector CsvDoc)) readCSVFile :: FilePath -> IO (Either Text (Header, Vector CsvDoc))
readCSVFile fp = do readCSVFile fp = do
result <- readCsvLazyBS Comma <$> BL.readFile fp result <- fmap (readCsvLazyBS Comma) $ BL.readFile fp
case result of case result of
Left _err -> readCsvLazyBS Tab <$> BL.readFile fp Left _err -> fmap (readCsvLazyBS Tab) $ BL.readFile fp
Right res -> pure $ Right res Right res -> pure $ Right res
...@@ -374,6 +382,8 @@ csvHal2doc (CsvHal { .. }) = ...@@ -374,6 +382,8 @@ csvHal2doc (CsvHal { .. }) =
HyperdataDocument { _hd_bdd = Just "CsvHal" HyperdataDocument { _hd_bdd = Just "CsvHal"
, _hd_doi = Just csvHal_doiId_s , _hd_doi = Just csvHal_doiId_s
, _hd_url = Just csvHal_url , _hd_url = Just csvHal_url
, _hd_uniqId = Nothing
, _hd_uniqIdBdd = Nothing
, _hd_page = Nothing , _hd_page = Nothing
, _hd_title = Just csvHal_title , _hd_title = Just csvHal_title
, _hd_authors = Just csvHal_authors , _hd_authors = Just csvHal_authors
...@@ -397,6 +407,8 @@ csv2doc (CsvDoc { .. }) ...@@ -397,6 +407,8 @@ csv2doc (CsvDoc { .. })
= HyperdataDocument { _hd_bdd = Just "CsvHal" = HyperdataDocument { _hd_bdd = Just "CsvHal"
, _hd_doi = Nothing , _hd_doi = Nothing
, _hd_url = Nothing , _hd_url = Nothing
, _hd_uniqId = Nothing
, _hd_uniqIdBdd = Nothing
, _hd_page = Nothing , _hd_page = Nothing
, _hd_title = Just csv_title , _hd_title = Just csv_title
, _hd_authors = Just csv_authors , _hd_authors = Just csv_authors
...@@ -422,10 +434,10 @@ csv2doc (CsvDoc { .. }) ...@@ -422,10 +434,10 @@ csv2doc (CsvDoc { .. })
parseHal :: FilePath -> IO (Either Text [HyperdataDocument]) parseHal :: FilePath -> IO (Either Text [HyperdataDocument])
parseHal fp = do parseHal fp = do
r <- readCsvHal fp r <- readCsvHal fp
pure $ V.toList . V.map csvHal2doc . snd <$> r pure $ (V.toList . V.map csvHal2doc . snd) <$> r
parseHal' :: BL.ByteString -> Either Text [HyperdataDocument] parseHal' :: BL.ByteString -> Either Text [HyperdataDocument]
parseHal' bs = V.toList . V.map csvHal2doc . snd <$> readCsvHalLazyBS bs parseHal' bs = (V.toList . V.map csvHal2doc . snd) <$> readCsvHalLazyBS bs
------------------------------------------------------------------------ ------------------------------------------------------------------------
...@@ -443,7 +455,7 @@ parseCsv' bs = do ...@@ -443,7 +455,7 @@ parseCsv' bs = do
result = case readCsvLazyBS Comma bs of result = case readCsvLazyBS Comma bs of
Left _err -> readCsvLazyBS Tab bs Left _err -> readCsvLazyBS Tab bs
Right res -> Right res Right res -> Right res
V.toList . V.map csv2doc . snd <$> result (V.toList . V.map csv2doc . snd) <$> result
parseCsvC :: BL.ByteString parseCsvC :: BL.ByteString
-> Either Text (Integer, ConduitT () HyperdataDocument Identity ()) -> Either Text (Integer, ConduitT () HyperdataDocument Identity ())
......
...@@ -13,12 +13,12 @@ module Gargantext.Core.Text.Corpus.Parsers.Gitlab ( ...@@ -13,12 +13,12 @@ module Gargantext.Core.Text.Corpus.Parsers.Gitlab (
Issue(..), gitlabIssue2hyperdataDocument, readFile_Issues, readFile_IssuesAsDocs Issue(..), gitlabIssue2hyperdataDocument, readFile_Issues, readFile_IssuesAsDocs
) where ) where
import Data.Aeson ( FromJSON(parseJSON), decode, (.:), (.:?), withObject ) import Data.Aeson
import Data.ByteString.Lazy qualified as DBL import Data.ByteString.Lazy qualified as DBL
import Data.Text qualified as DT import Data.Text qualified as DT
import Data.Time import Data.Time
import Gargantext.Core (Lang(..)) import Gargantext.Core (Lang(..))
import Gargantext.Database.Admin.Types.Hyperdata.Document ( HyperdataDocument(..) ) import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..))
import Gargantext.Prelude import Gargantext.Prelude
data Issue = Issue { _issue_id :: !Int data Issue = Issue { _issue_id :: !Int
...@@ -42,6 +42,8 @@ gitlabIssue2hyperdataDocument issue = HyperdataDocument ...@@ -42,6 +42,8 @@ gitlabIssue2hyperdataDocument issue = HyperdataDocument
{ _hd_bdd = Nothing { _hd_bdd = Nothing
, _hd_doi = Nothing , _hd_doi = Nothing
, _hd_url = Nothing , _hd_url = Nothing
, _hd_uniqId = Nothing
, _hd_uniqIdBdd = Nothing
, _hd_page = Nothing , _hd_page = Nothing
, _hd_title = Just (_issue_title issue) , _hd_title = Just (_issue_title issue)
, _hd_authors = Nothing , _hd_authors = Nothing
......
...@@ -31,8 +31,8 @@ import Data.ByteString.Lazy qualified as DBL ...@@ -31,8 +31,8 @@ import Data.ByteString.Lazy qualified as DBL
import Data.JsonStream.Parser qualified as P import Data.JsonStream.Parser qualified as P
import Data.Text qualified as Text import Data.Text qualified as Text
import Gargantext.Core (Lang(..)) import Gargantext.Core (Lang(..))
import Gargantext.Database.Admin.Types.Hyperdata.Document (HyperdataDocument(..), ToHyperdataDocument, toHyperdataDocument) import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..), ToHyperdataDocument, toHyperdataDocument)
import Gargantext.Database.GargDB ( ReadFile(..) ) import Gargantext.Database.GargDB
import Gargantext.Prelude import Gargantext.Prelude
data GrandDebatReference = GrandDebatReference data GrandDebatReference = GrandDebatReference
...@@ -43,14 +43,14 @@ data GrandDebatReference = GrandDebatReference ...@@ -43,14 +43,14 @@ data GrandDebatReference = GrandDebatReference
, createdAt :: !(Maybe Text) , createdAt :: !(Maybe Text)
, publishedAt :: !(Maybe Text) , publishedAt :: !(Maybe Text)
, updatedAt :: !(Maybe Text) , updatedAt :: !(Maybe Text)
, trashed :: !(Maybe Bool) , trashed :: !(Maybe Bool)
, trashedStatus :: !(Maybe Text) , trashedStatus :: !(Maybe Text)
, authorId :: !(Maybe Text) , authorId :: !(Maybe Text)
, authorType :: !(Maybe Text) , authorType :: !(Maybe Text)
, authorZipCode :: !(Maybe Text) , authorZipCode :: !(Maybe Text)
, responses :: !(Maybe [GrandDebatResponse]) , responses :: !(Maybe [GrandDebatResponse])
} }
deriving (Show, Generic) deriving (Show, Generic)
...@@ -77,6 +77,8 @@ instance ToHyperdataDocument GrandDebatReference ...@@ -77,6 +77,8 @@ instance ToHyperdataDocument GrandDebatReference
HyperdataDocument { _hd_bdd = Just "GrandDebat" HyperdataDocument { _hd_bdd = Just "GrandDebat"
, _hd_doi = id , _hd_doi = id
, _hd_url = Nothing , _hd_url = Nothing
, _hd_uniqId = Nothing
, _hd_uniqIdBdd = Nothing
, _hd_page = Nothing , _hd_page = Nothing
, _hd_title = title , _hd_title = title
, _hd_authors = authorType , _hd_authors = authorType
...@@ -92,10 +94,12 @@ instance ToHyperdataDocument GrandDebatReference ...@@ -92,10 +94,12 @@ instance ToHyperdataDocument GrandDebatReference
, _hd_publication_second = Nothing , _hd_publication_second = Nothing
, _hd_language_iso2 = Just $ Text.pack $ show FR } , _hd_language_iso2 = Just $ Text.pack $ show FR }
where where
toAbstract = Text.intercalate " . " . (filter (/= "") . map toSentence) toAbstract = (Text.intercalate " . ") . ((filter (/= "")) . (map toSentence))
toSentence (GrandDebatResponse _id _qtitle _qvalue r) = case r of toSentence (GrandDebatResponse _id _qtitle _qvalue r) = case r of
Nothing -> "" Nothing -> ""
Just r' -> if Text.length r' > 10 then r' else "" Just r' -> case Text.length r' > 10 of
True -> r'
False -> ""
instance ReadFile [GrandDebatReference] instance ReadFile [GrandDebatReference]
where where
......
...@@ -20,14 +20,14 @@ TODO: ...@@ -20,14 +20,14 @@ TODO:
module Gargantext.Core.Text.Corpus.Parsers.Isidore where module Gargantext.Core.Text.Corpus.Parsers.Isidore where
import Control.Lens ( (^.), (.~) ) import Control.Lens hiding (contains)
import Data.ByteString.Lazy (ByteString) import Data.ByteString.Lazy (ByteString)
import Data.RDF ( Node(LNode, UNode), LValue(PlainLL, TypedL, PlainL) ) import Data.RDF hiding (triple, Query)
import Data.Text qualified as T import Data.Text qualified as T
import Database.HSparql.Connection ( BindingValue(..), EndPoint, structureContent ) import Database.HSparql.Connection
import Database.HSparql.QueryGenerator import Database.HSparql.QueryGenerator
import Gargantext.Core (Lang) import Gargantext.Core (Lang)
import Gargantext.Database.Admin.Types.Hyperdata.Document ( HyperdataDocument(..) ) import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..))
import Gargantext.Prelude hiding (ByteString) import Gargantext.Prelude hiding (ByteString)
import Network.Wreq (getWith, Response, defaults, header, param, responseStatus, responseBody) import Network.Wreq (getWith, Response, defaults, header, param, responseStatus, responseBody)
import Prelude qualified import Prelude qualified
...@@ -115,7 +115,7 @@ unbound _ Unbound = Nothing ...@@ -115,7 +115,7 @@ unbound _ Unbound = Nothing
unbound _ (Bound (UNode x)) = Just x unbound _ (Bound (UNode x)) = Just x
unbound _ (Bound (LNode (TypedL x _))) = Just x unbound _ (Bound (LNode (TypedL x _))) = Just x
unbound _ (Bound (LNode (PlainL x))) = Just x unbound _ (Bound (LNode (PlainL x))) = Just x
unbound l (Bound (LNode (PlainLL x l'))) = if l' == T.toLower (show l) then Just x else Nothing unbound l (Bound (LNode (PlainLL x l'))) = if l' == (T.toLower $ show l) then Just x else Nothing
unbound _ _ = Nothing unbound _ _ = Nothing
bind2doc :: Lang -> [BindingValue] -> HyperdataDocument bind2doc :: Lang -> [BindingValue] -> HyperdataDocument
...@@ -123,6 +123,8 @@ bind2doc l [ link', date, langDoc, authors, _source, publisher, title, abstract ...@@ -123,6 +123,8 @@ bind2doc l [ link', date, langDoc, authors, _source, publisher, title, abstract
HyperdataDocument { _hd_bdd = Just "Isidore" HyperdataDocument { _hd_bdd = Just "Isidore"
, _hd_doi = Nothing , _hd_doi = Nothing
, _hd_url = unbound l link' , _hd_url = unbound l link'
, _hd_uniqId = Nothing
, _hd_uniqIdBdd = Nothing
, _hd_page = Nothing , _hd_page = Nothing
, _hd_title = unbound l title , _hd_title = unbound l title
, _hd_authors = unbound l authors , _hd_authors = unbound l authors
......
...@@ -19,10 +19,11 @@ module Gargantext.Core.Text.Corpus.Parsers.JSON.Istex where ...@@ -19,10 +19,11 @@ module Gargantext.Core.Text.Corpus.Parsers.JSON.Istex where
import Data.Text qualified as T import Data.Text qualified as T
import Gargantext.Core (Lang(..)) import Gargantext.Core (Lang(..))
import Gargantext.Core.Text.Corpus.Parsers.Date qualified as Date import Gargantext.Core.Text.Corpus.Parsers.Date qualified as Date
import Gargantext.Database.Admin.Types.Hyperdata.Document (HyperdataDocument(..)) import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..))
import Gargantext.Defaults qualified as Defaults import Gargantext.Defaults qualified as Defaults
import Gargantext.Prelude hiding (length) import Gargantext.Prelude hiding (length, show)
import ISTEX.Client qualified as ISTEX import ISTEX.Client qualified as ISTEX
import Protolude
-- | TODO remove dateSplit here -- | TODO remove dateSplit here
...@@ -36,11 +37,13 @@ toDoc la (ISTEX.Document i t a ab d s) = do ...@@ -36,11 +37,13 @@ toDoc la (ISTEX.Document i t a ab d s) = do
pure $ HyperdataDocument { _hd_bdd = Just "Istex" pure $ HyperdataDocument { _hd_bdd = Just "Istex"
, _hd_doi = Just i , _hd_doi = Just i
, _hd_url = Nothing , _hd_url = Nothing
, _hd_uniqId = Nothing
, _hd_uniqIdBdd = Nothing
, _hd_page = Nothing , _hd_page = Nothing
, _hd_title = t , _hd_title = t
, _hd_authors = Just $ foldl' (\x y -> if x == "" then y else x <> ", " <> y) "" (map ISTEX._author_name a) , _hd_authors = Just $ foldl (\x y -> if x == "" then y else x <> ", " <> y) "" (map ISTEX._author_name a)
, _hd_institutes = Just $ foldl' (\x y -> if x == "" then y else x <> ", " <> y) "" (concatMap ISTEX._author_affiliations a) , _hd_institutes = Just $ foldl (\x y -> if x == "" then y else x <> ", " <> y) "" (concat $ (map ISTEX._author_affiliations) a)
, _hd_source = Just $ foldl' (\x y -> if x == "" then y else x <> ", " <> y) "" (ISTEX._source_title s) , _hd_source = Just $ foldl (\x y -> if x == "" then y else x <> ", " <> y) "" (ISTEX._source_title s)
, _hd_abstract = ab , _hd_abstract = ab
, _hd_publication_date = fmap (T.pack . show) utctime , _hd_publication_date = fmap (T.pack . show) utctime
, _hd_publication_year = pub_year , _hd_publication_year = pub_year
......
...@@ -21,11 +21,11 @@ module Gargantext.Core.Text.Corpus.Parsers.Wikidata where ...@@ -21,11 +21,11 @@ module Gargantext.Core.Text.Corpus.Parsers.Wikidata where
import Control.Lens (makeLenses, (^.) ) import Control.Lens (makeLenses, (^.) )
import Data.List qualified as List import Data.List qualified as List
import Data.Text (concat) import Data.Text (concat)
import Database.HSparql.Connection ( BindingValue, EndPoint, selectQueryRaw ) import Database.HSparql.Connection
import Gargantext.Core (Lang(..)) import Gargantext.Core (Lang(..))
import Gargantext.Core.Text.Corpus.Parsers.Date (mDateSplit) import Gargantext.Core.Text.Corpus.Parsers.Date (mDateSplit)
import Gargantext.Core.Text.Corpus.Parsers.Isidore (unbound) import Gargantext.Core.Text.Corpus.Parsers.Isidore (unbound)
import Gargantext.Core.Text.Corpus.Parsers.Wikidata.Crawler ( crawlPage ) import Gargantext.Core.Text.Corpus.Parsers.Wikidata.Crawler
import Gargantext.Database.Admin.Types.Hyperdata.Document (HyperdataDocument(..)) import Gargantext.Database.Admin.Types.Hyperdata.Document (HyperdataDocument(..))
import Gargantext.Prelude hiding (concat) import Gargantext.Prelude hiding (concat)
import Prelude qualified import Prelude qualified
...@@ -57,9 +57,11 @@ wikiPageToDocument m wr = do ...@@ -57,9 +57,11 @@ wikiPageToDocument m wr = do
let bdd = Just "wikidata" let bdd = Just "wikidata"
doi = Nothing doi = Nothing
url = wr ^. wr_url url = (wr ^. wr_url)
uniqId = Nothing
uniqIdBdd = Nothing
page = Nothing page = Nothing
title = wr ^. wr_title title = (wr ^. wr_title)
authors = Nothing authors = Nothing
institutes = Nothing institutes = Nothing
source = Nothing source = Nothing
...@@ -81,6 +83,8 @@ wikiPageToDocument m wr = do ...@@ -81,6 +83,8 @@ wikiPageToDocument m wr = do
pure $ HyperdataDocument { _hd_bdd = bdd pure $ HyperdataDocument { _hd_bdd = bdd
, _hd_doi = doi , _hd_doi = doi
, _hd_url = url , _hd_url = url
, _hd_uniqId = uniqId
, _hd_uniqIdBdd = uniqIdBdd
, _hd_page = page , _hd_page = page
, _hd_title = title , _hd_title = title
, _hd_authors = authors , _hd_authors = authors
......
...@@ -167,7 +167,7 @@ instance Semigroup TokenTag where ...@@ -167,7 +167,7 @@ instance Semigroup TokenTag where
instance Monoid TokenTag where instance Monoid TokenTag where
mempty = TokenTag [] empty Nothing Nothing mempty = TokenTag [] empty Nothing Nothing
mconcat = foldl' mappend mempty mconcat = foldl mappend mempty
-- mappend t1 t2 = (<>) t1 t2 -- mappend t1 t2 = (<>) t1 t2
......
...@@ -9,21 +9,22 @@ Portability : POSIX ...@@ -9,21 +9,22 @@ Portability : POSIX
-} -}
{-# LANGUAGE DeriveAnyClass #-}
module Gargantext.Core.Types.Search where module Gargantext.Core.Types.Search where
import Data.Aeson hiding (defaultTaggedObject) import Data.Aeson hiding (defaultTaggedObject)
import Data.Swagger ( ToSchema(..), genericDeclareNamedSchema ) import Data.Swagger hiding (fieldLabelModifier, Contact)
import Data.Text qualified as Text import Data.Text qualified as Text
import Data.Time (UTCTime) import Data.Time (UTCTime)
import Gargantext.Core.Utils.Prefix (dropPrefix, unCapitalize, unPrefixSwagger) import Gargantext.Core.Utils.Prefix (dropPrefix, unCapitalize, unPrefixSwagger)
import Gargantext.Database.Admin.Types.Hyperdata.Contact ( ContactWhere(..), HyperdataContact(..), ContactWho(..) ) import Gargantext.Database.Admin.Types.Hyperdata (ContactWhere(..), HyperdataContact(..), HyperdataDocument(..), ContactWho(..))
import Gargantext.Database.Admin.Types.Hyperdata.Document ( HyperdataDocument(..) ) import Gargantext.Database.Admin.Types.Node
import Gargantext.Database.Admin.Types.Node ( NodeId )
import Gargantext.Database.Query.Facet.Types (Facet(..), FacetDoc, FacetPaired(..)) import Gargantext.Database.Query.Facet.Types (Facet(..), FacetDoc, FacetPaired(..))
import Gargantext.Defaults qualified as Defaults import Gargantext.Defaults qualified as Defaults
import Gargantext.Prelude import Gargantext.Prelude
import Gargantext.Utils.Aeson (defaultTaggedObject) import Gargantext.Utils.Aeson (defaultTaggedObject)
import Test.QuickCheck.Arbitrary ( Arbitrary(arbitrary) ) import Test.QuickCheck.Arbitrary
data Row = data Row =
...@@ -92,6 +93,8 @@ data HyperdataRow = ...@@ -92,6 +93,8 @@ data HyperdataRow =
, _hr_source :: !Text , _hr_source :: !Text
, _hr_title :: !Text , _hr_title :: !Text
, _hr_url :: !Text , _hr_url :: !Text
, _hr_uniqId :: !Text
, _hr_uniqIdBdd :: !Text
} }
| HyperdataRowContact { _hr_firstname :: !Text | HyperdataRowContact { _hr_firstname :: !Text
, _hr_lastname :: !Text , _hr_lastname :: !Text
...@@ -145,7 +148,9 @@ instance ToHyperdataRow HyperdataDocument where ...@@ -145,7 +148,9 @@ instance ToHyperdataRow HyperdataDocument where
, _hr_publication_second = fromMaybe 0 _hd_publication_second , _hr_publication_second = fromMaybe 0 _hd_publication_second
, _hr_source = fromMaybe "" _hd_source , _hr_source = fromMaybe "" _hd_source
, _hr_title = fromMaybe "Title" _hd_title , _hr_title = fromMaybe "Title" _hd_title
, _hr_url = fromMaybe "" _hd_url } , _hr_url = fromMaybe "" _hd_url
, _hr_uniqId = fromMaybe "" _hd_uniqId
, _hr_uniqIdBdd = fromMaybe "" _hd_uniqIdBdd }
instance ToHyperdataRow HyperdataContact where instance ToHyperdataRow HyperdataContact where
toHyperdataRow (HyperdataContact { _hc_who = Just (ContactWho _ fn ln _ _ _), _hc_where = ou} ) = toHyperdataRow (HyperdataContact { _hc_who = Just (ContactWho _ fn ln _ _ _), _hc_where = ou} ) =
......
...@@ -11,6 +11,7 @@ Portability : POSIX ...@@ -11,6 +11,7 @@ Portability : POSIX
{-# LANGUAGE ConstraintKinds #-} {-# LANGUAGE ConstraintKinds #-}
{-# LANGUAGE ConstrainedClassMethods #-} {-# LANGUAGE ConstrainedClassMethods #-}
{-# LANGUAGE ConstraintKinds #-}
{-# LANGUAGE InstanceSigs #-} {-# LANGUAGE InstanceSigs #-}
{-# LANGUAGE TemplateHaskell #-} {-# LANGUAGE TemplateHaskell #-}
...@@ -51,7 +52,9 @@ type FlowCmdM env err m = ...@@ -51,7 +52,9 @@ type FlowCmdM env err m =
, MonadLogger m , MonadLogger m
) )
type FlowCorpus a = ( UniqParameters a type FlowCorpus a = ( AddUniqId a
, UniqId a
, UniqParameters a
, InsertDb a , InsertDb a
, ExtractNgramsT a , ExtractNgramsT a
, HasText a , HasText a
......
...@@ -16,6 +16,7 @@ Portability : POSIX ...@@ -16,6 +16,7 @@ Portability : POSIX
{-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE FunctionalDependencies #-} {-# LANGUAGE FunctionalDependencies #-}
{-# LANGUAGE GeneralizedNewtypeDeriving #-} {-# LANGUAGE GeneralizedNewtypeDeriving #-}
{-# LANGUAGE MultiParamTypeClasses #-}
{-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE RankNTypes #-} {-# LANGUAGE RankNTypes #-}
...@@ -31,7 +32,7 @@ import Gargantext.API.GraphQL.Utils qualified as GAGU ...@@ -31,7 +32,7 @@ import Gargantext.API.GraphQL.Utils qualified as GAGU
import Gargantext.Core.Text (HasText(..)) import Gargantext.Core.Text (HasText(..))
import Gargantext.Database.Admin.Types.Hyperdata.Prelude import Gargantext.Database.Admin.Types.Hyperdata.Prelude
import Gargantext.Prelude import Gargantext.Prelude
import Gargantext.Utils.UTCTime ( NUTCTime(..) ) import Gargantext.Utils.UTCTime
-------------------------------------------------------------------------------- --------------------------------------------------------------------------------
data HyperdataContact = data HyperdataContact =
...@@ -41,6 +42,8 @@ data HyperdataContact = ...@@ -41,6 +42,8 @@ data HyperdataContact =
, _hc_title :: Maybe Text -- TODO remove (only demo) , _hc_title :: Maybe Text -- TODO remove (only demo)
, _hc_source :: Maybe Text -- TODO remove (only demo) , _hc_source :: Maybe Text -- TODO remove (only demo)
, _hc_lastValidation :: Maybe Text -- TODO UTCTime , _hc_lastValidation :: Maybe Text -- TODO UTCTime
, _hc_uniqIdBdd :: Maybe Text
, _hc_uniqId :: Maybe Text
} deriving (Eq, Show, Generic) } deriving (Eq, Show, Generic)
instance GQLType HyperdataContact where instance GQLType HyperdataContact where
...@@ -58,7 +61,9 @@ defaultHyperdataContact = ...@@ -58,7 +61,9 @@ defaultHyperdataContact =
, _hc_where = [defaultContactWhere] , _hc_where = [defaultContactWhere]
, _hc_title =Just "Title" , _hc_title =Just "Title"
, _hc_source = Just "Source" , _hc_source = Just "Source"
, _hc_lastValidation = Just "TODO lastValidation date" } , _hc_lastValidation = Just "TODO lastValidation date"
, _hc_uniqIdBdd = Just "DO NOT expose this"
, _hc_uniqId = Just "DO NOT expose this" }
hyperdataContact :: FirstName -> LastName -> HyperdataContact hyperdataContact :: FirstName -> LastName -> HyperdataContact
hyperdataContact fn ln = hyperdataContact fn ln =
...@@ -68,7 +73,9 @@ hyperdataContact fn ln = ...@@ -68,7 +73,9 @@ hyperdataContact fn ln =
, _hc_where = [] , _hc_where = []
, _hc_title = Nothing , _hc_title = Nothing
, _hc_source = Nothing , _hc_source = Nothing
, _hc_lastValidation = Nothing } , _hc_lastValidation = Nothing
, _hc_uniqIdBdd = Nothing
, _hc_uniqId = Nothing }
-- TOD0 contact metadata (Type is too flat) -- TOD0 contact metadata (Type is too flat)
data ContactMetaData = data ContactMetaData =
...@@ -87,7 +94,9 @@ arbitraryHyperdataContact = ...@@ -87,7 +94,9 @@ arbitraryHyperdataContact =
, _hc_where = [] , _hc_where = []
, _hc_title = Nothing , _hc_title = Nothing
, _hc_source = Nothing , _hc_source = Nothing
, _hc_lastValidation = Nothing } , _hc_lastValidation = Nothing
, _hc_uniqIdBdd = Nothing
, _hc_uniqId = Nothing }
data ContactWho = data ContactWho =
...@@ -179,7 +188,7 @@ instance ToSchema ContactMetaData where ...@@ -179,7 +188,7 @@ instance ToSchema ContactMetaData where
-- | Arbitrary instances -- | Arbitrary instances
instance Arbitrary HyperdataContact where instance Arbitrary HyperdataContact where
arbitrary = elements [ HyperdataContact Nothing Nothing [] Nothing Nothing Nothing ] arbitrary = elements [HyperdataContact Nothing Nothing [] Nothing Nothing Nothing Nothing Nothing]
-- | Specific Gargantext instance -- | Specific Gargantext instance
instance Hyperdata HyperdataContact instance Hyperdata HyperdataContact
......
...@@ -29,6 +29,8 @@ import Gargantext.Database.Admin.Types.Hyperdata.Prelude ...@@ -29,6 +29,8 @@ import Gargantext.Database.Admin.Types.Hyperdata.Prelude
data HyperdataDocument = HyperdataDocument { _hd_bdd :: !(Maybe Text) data HyperdataDocument = HyperdataDocument { _hd_bdd :: !(Maybe Text)
, _hd_doi :: !(Maybe Text) , _hd_doi :: !(Maybe Text)
, _hd_url :: !(Maybe Text) , _hd_url :: !(Maybe Text)
, _hd_uniqId :: !(Maybe Text)
, _hd_uniqIdBdd :: !(Maybe Text)
, _hd_page :: !(Maybe Int) , _hd_page :: !(Maybe Int)
, _hd_title :: !(Maybe Text) , _hd_title :: !(Maybe Text)
, _hd_authors :: !(Maybe Text) , _hd_authors :: !(Maybe Text)
...@@ -56,7 +58,7 @@ instance HasText HyperdataDocument ...@@ -56,7 +58,7 @@ instance HasText HyperdataDocument
defaultHyperdataDocument :: HyperdataDocument defaultHyperdataDocument :: HyperdataDocument
defaultHyperdataDocument = case decode docExample of defaultHyperdataDocument = case decode docExample of
Just hp -> hp Just hp -> hp
Nothing -> HyperdataDocument Nothing Nothing Nothing -> HyperdataDocument Nothing Nothing Nothing Nothing
Nothing Nothing Nothing Nothing Nothing Nothing Nothing Nothing
Nothing Nothing Nothing Nothing Nothing Nothing Nothing Nothing
Nothing Nothing Nothing Nothing Nothing Nothing Nothing Nothing
...@@ -105,8 +107,7 @@ instance ToHyperdataDocument HyperdataDocument ...@@ -105,8 +107,7 @@ instance ToHyperdataDocument HyperdataDocument
------------------------------------------------------------------------ ------------------------------------------------------------------------
instance Eq HyperdataDocument where instance Eq HyperdataDocument where
(==) h1 h2 = _hd_title h1 == _hd_title h2 (==) h1 h2 = (==) (_hd_uniqId h1) (_hd_uniqId h2)
&& _hd_abstract h1 == _hd_abstract h2
------------------------------------------------------------------------ ------------------------------------------------------------------------
instance Ord HyperdataDocument where instance Ord HyperdataDocument where
...@@ -125,7 +126,7 @@ arbitraryHyperdataDocuments = ...@@ -125,7 +126,7 @@ arbitraryHyperdataDocuments =
] :: [(Text, Text)]) ] :: [(Text, Text)])
where where
toHyperdataDocument' (t1,t2) = toHyperdataDocument' (t1,t2) =
HyperdataDocument Nothing Nothing Nothing Nothing (Just t1) HyperdataDocument Nothing Nothing Nothing Nothing Nothing Nothing (Just t1)
Nothing Nothing (Just t2) Nothing Nothing Nothing Nothing Nothing Nothing Nothing (Just t2) Nothing Nothing Nothing Nothing Nothing
Nothing Nothing Nothing Nothing Nothing Nothing Nothing Nothing
......
...@@ -57,12 +57,15 @@ the concatenation of the parameters defined by @shaParameters@. ...@@ -57,12 +57,15 @@ the concatenation of the parameters defined by @shaParameters@.
module Gargantext.Database.Query.Table.Node.Document.Insert module Gargantext.Database.Query.Table.Node.Document.Insert
where where
import Control.Lens (set, view)
import Control.Lens.Cons
import Control.Lens.Prism
import Data.Aeson (toJSON, ToJSON) import Data.Aeson (toJSON, ToJSON)
import Data.Text qualified as DT (pack, concat, take, filter, toLower) import Data.Text qualified as DT (pack, concat, take, filter, toLower)
import Data.Time.Segment (jour) import Data.Time.Segment (jour)
import Database.PostgreSQL.Simple (FromRow, Query, Only(..)) import Database.PostgreSQL.Simple (FromRow, Query, Only(..))
import Database.PostgreSQL.Simple.FromRow (fromRow, field) import Database.PostgreSQL.Simple.FromRow (fromRow, field)
import Database.PostgreSQL.Simple.SqlQQ ( sql ) import Database.PostgreSQL.Simple.SqlQQ
import Database.PostgreSQL.Simple.ToField (toField, Action{-, ToField-}) import Database.PostgreSQL.Simple.ToField (toField, Action{-, ToField-})
import Database.PostgreSQL.Simple.Types (Values(..), QualifiedIdentifier(..)) import Database.PostgreSQL.Simple.Types (Values(..), QualifiedIdentifier(..))
import Gargantext.Core (HasDBid(toDBid)) import Gargantext.Core (HasDBid(toDBid))
...@@ -90,7 +93,7 @@ import Database.PostgreSQL.Simple (formatQuery) ...@@ -90,7 +93,7 @@ import Database.PostgreSQL.Simple (formatQuery)
insertDb :: (InsertDb a, HasDBid NodeType) => UserId -> Maybe ParentId -> [a] -> DBCmd err [ReturnId] insertDb :: (InsertDb a, HasDBid NodeType) => UserId -> Maybe ParentId -> [a] -> DBCmd err [ReturnId]
insertDb u p = runPGSQuery queryInsert . Only . Values fields . map (insertDb' u p) insertDb u p = runPGSQuery queryInsert . Only . Values fields . map (insertDb' u p)
where where
fields = map (QualifiedIdentifier Nothing) inputSqlTypes fields = map (\t-> QualifiedIdentifier Nothing t) inputSqlTypes
class InsertDb a class InsertDb a
where where
...@@ -105,18 +108,18 @@ instance InsertDb HyperdataDocument ...@@ -105,18 +108,18 @@ instance InsertDb HyperdataDocument
, toField p , toField p
, toField $ maybe "No Title" (DT.take 255) (_hd_title h) , toField $ maybe "No Title" (DT.take 255) (_hd_title h)
, toField $ _hd_publication_date h -- TODO USE UTCTime , toField $ _hd_publication_date h -- TODO USE UTCTime
-- , (toField . toJSON) (addUniqId h) , (toField . toJSON) (addUniqId h)
] ]
instance InsertDb HyperdataContact instance InsertDb HyperdataContact
where where
insertDb' u p _h = [ toField ("" :: Text) insertDb' u p h = [ toField ("" :: Text)
, toField $ toDBid NodeContact , toField $ toDBid NodeContact
, toField u , toField u
, toField p , toField p
, toField $ maybe "Contact" (DT.take 255) (Just "Name") -- (_hc_name h) , toField $ maybe "Contact" (DT.take 255) (Just "Name") -- (_hc_name h)
, toField $ jour 0 1 1 -- TODO put default date , toField $ jour 0 1 1 -- TODO put default date
-- , (toField . toJSON) (addUniqId h) , (toField . toJSON) (addUniqId h)
] ]
instance ToJSON a => InsertDb (Node a) instance ToJSON a => InsertDb (Node a)
...@@ -191,73 +194,73 @@ class AddUniqId a ...@@ -191,73 +194,73 @@ class AddUniqId a
where where
addUniqId :: a -> a addUniqId :: a -> a
-- instance AddUniqId HyperdataDocument
-- where
-- addUniqId = addUniqIdsDoc
-- where
-- addUniqIdsDoc :: HyperdataDocument -> HyperdataDocument
-- addUniqIdsDoc doc = set hd_uniqIdBdd (Just shaBdd)
-- $ set hd_uniqId (Just shaUni) doc
-- where
-- shaUni = hash $ DT.concat $ map ($ doc) shaParametersDoc
-- shaBdd = hash $ DT.concat $ map ($ doc) ([maybeText . _hd_bdd] <> shaParametersDoc)
-- shaParametersDoc :: [HyperdataDocument -> Text]
-- shaParametersDoc = [ filterText . maybeText . _hd_title
-- , filterText . maybeText . _hd_abstract
-- , filterText . maybeText . _hd_source
-- -- , \d -> maybeText (_hd_publication_date d)
-- ]
class UniqParameters a class UniqParameters a
where where
uniqParameters :: a -> Text uniqParameters :: ParentId -> a -> Text
instance AddUniqId HyperdataDocument
where
addUniqId = addUniqIdsDoc
where
addUniqIdsDoc :: HyperdataDocument -> HyperdataDocument
addUniqIdsDoc doc = set hd_uniqIdBdd (Just shaBdd)
$ set hd_uniqId (Just shaUni) doc
where
shaUni = hash $ DT.concat $ map ($ doc) shaParametersDoc
shaBdd = hash $ DT.concat $ map ($ doc) ([(\d -> maybeText (_hd_bdd d))] <> shaParametersDoc)
shaParametersDoc :: [(HyperdataDocument -> Text)]
shaParametersDoc = [ \d -> filterText $ maybeText (_hd_title d)
, \d -> filterText $ maybeText (_hd_abstract d)
, \d -> filterText $ maybeText (_hd_source d)
-- , \d -> maybeText (_hd_publication_date d)
]
instance UniqParameters HyperdataDocument instance UniqParameters HyperdataDocument
where where
uniqParameters h = filterText $ DT.concat $ map maybeText $ [_hd_title h, _hd_abstract h, _hd_source h] uniqParameters _ h = filterText $ DT.concat $ map maybeText $ [_hd_title h, _hd_abstract h, _hd_source h]
instance UniqParameters HyperdataContact instance UniqParameters HyperdataContact
where where
uniqParameters _ = "" uniqParameters _ _ = ""
instance UniqParameters (Node a) instance UniqParameters (Node a)
where where
uniqParameters _ = undefined uniqParameters _ _ = undefined
filterText :: Text -> Text filterText :: Text -> Text
filterText = DT.toLower . DT.filter isAlphaNum filterText = DT.toLower . (DT.filter isAlphaNum)
instance (UniqParameters a, ToJSON a, HasDBid NodeType) => AddUniqId (Node a) instance (UniqParameters a, ToJSON a, HasDBid NodeType) => AddUniqId (Node a)
where where
addUniqId (Node nid _ t u p n d h) = Node nid (Just newHash) t u p n d h addUniqId (Node nid _ t u p n d h) = Node nid (Just newHash) t u p n d h
where where
newHash = "\\x" <> hash (uniqParameters h) newHash = "\\x" <> (hash $ uniqParameters (fromMaybe 0 p) h)
--------------------------------------------------------------------------- ---------------------------------------------------------------------------
-- * Uniqueness of document definition -- * Uniqueness of document definition
-- TODO factorize with above (use the function below for tests) -- TODO factorize with above (use the function below for tests)
-- instance AddUniqId HyperdataContact instance AddUniqId HyperdataContact
-- where where
-- addUniqId = addUniqIdsContact addUniqId = addUniqIdsContact
-- addUniqIdsContact :: HyperdataContact -> HyperdataContact addUniqIdsContact :: HyperdataContact -> HyperdataContact
-- addUniqIdsContact hc = set hc_uniqIdBdd (Just shaBdd) addUniqIdsContact hc = set (hc_uniqIdBdd) (Just shaBdd)
-- $ set hc_uniqId (Just shaUni) hc $ set (hc_uniqId ) (Just shaUni) hc
-- where where
shaUni = hash $ DT.concat $ map ($ hc) shaParametersContact
shaBdd = hash $ DT.concat $ map ($ hc) ([\d -> maybeText (view hc_bdd d)] <> shaParametersContact)
-- shaBdd = hash $ DT.concat $ map ($ hc) ([maybeText . view hc_bdd] <> shaParametersContact) -- | TODO add more shaparameters
shaParametersContact :: [(HyperdataContact -> Text)]
-- -- | TODO add more shaparameters shaParametersContact = [ \d -> maybeText $ view (hc_who . _Just . cw_firstName ) d
-- shaParametersContact :: [HyperdataContact -> Text] , \d -> maybeText $ view (hc_who . _Just . cw_lastName ) d
-- shaParametersContact = [ maybeText . view (hc_who . _Just . cw_firstName ) , \d -> maybeText $ view (hc_where . _head . cw_touch . _Just . ct_mail) d
-- , maybeText . view (hc_who . _Just . cw_lastName ) ]
maybeText :: Maybe Text -> Text maybeText :: Maybe Text -> Text
...@@ -283,7 +286,7 @@ instance ToNode HyperdataDocument where ...@@ -283,7 +286,7 @@ instance ToNode HyperdataDocument where
-- TODO better Node -- TODO better Node
instance ToNode HyperdataContact where instance ToNode HyperdataContact where
toNode u p = Node 0 Nothing (toDBid NodeContact) u p "Contact" date toNode u p h = Node 0 Nothing (toDBid NodeContact) u p "Contact" date h
where where
date = jour 2020 01 01 date = jour 2020 01 01
......
...@@ -65,6 +65,7 @@ exampleDocument_01 = either error id $ parseEither parseJSON $ [aesonQQ| ...@@ -65,6 +65,7 @@ exampleDocument_01 = either error id $ parseEither parseJSON $ [aesonQQ|
exampleDocument_02 :: HyperdataDocument exampleDocument_02 :: HyperdataDocument
exampleDocument_02 = either error id $ parseEither parseJSON $ [aesonQQ| exampleDocument_02 = either error id $ parseEither parseJSON $ [aesonQQ|
{ "doi":"" { "doi":""
, "uniqId": "1405.3072v3"
, "bdd": "Arxiv" , "bdd": "Arxiv"
, "publication_day":6 , "publication_day":6
, "language_iso2":"EN" , "language_iso2":"EN"
...@@ -89,6 +90,7 @@ exampleDocument_03 = either error id $ parseEither parseJSON $ [aesonQQ| ...@@ -89,6 +90,7 @@ exampleDocument_03 = either error id $ parseEither parseJSON $ [aesonQQ|
, "url": "http://arxiv.org/pdf/1405.3072v2" , "url": "http://arxiv.org/pdf/1405.3072v2"
, "title": "Haskell for OCaml programmers" , "title": "Haskell for OCaml programmers"
, "source": "" , "source": ""
, "uniqId": "1405.3072v2"
, "authors": "Raphael Poss, Herbert Ballerina" , "authors": "Raphael Poss, Herbert Ballerina"
, "abstract": " This introduction to Haskell is written to optimize learning by programmers who already know OCaml. " , "abstract": " This introduction to Haskell is written to optimize learning by programmers who already know OCaml. "
, "institutes": "" , "institutes": ""
...@@ -106,6 +108,7 @@ exampleDocument_04 = either error id $ parseEither parseJSON $ [aesonQQ| ...@@ -106,6 +108,7 @@ exampleDocument_04 = either error id $ parseEither parseJSON $ [aesonQQ|
, "url": "http://arxiv.org/pdf/1407.5670v1" , "url": "http://arxiv.org/pdf/1407.5670v1"
, "title": "Rust for functional programmers" , "title": "Rust for functional programmers"
, "source": "" , "source": ""
, "uniqId": "1407.5670v1"
, "authors": "Raphael Poss" , "authors": "Raphael Poss"
, "abstract": " This article provides an introduction to Rust , a systems language by Mozilla , to programmers already familiar with Haskell , OCaml or other functional languages. " , "institutes": "" , "abstract": " This article provides an introduction to Rust , a systems language by Mozilla , to programmers already familiar with Haskell , OCaml or other functional languages. " , "institutes": ""
, "language_iso2": "EN" , "language_iso2": "EN"
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment