Commit f4284b4e authored by Alexandre Delanoë's avatar Alexandre Delanoë

Revert "Merge remote-tracking branch 'origin/169-dev-docs-uniq-id' into dev"

This reverts commit efbe327f, reversing
changes made to 6d9bcd07.
parent cb1e5947
......@@ -25,10 +25,10 @@ import Data.Morpheus.Types
import Data.Text (pack)
import Data.Time.Format.ISO8601 (iso8601Show)
import Gargantext.API.Admin.Types (HasSettings)
import Gargantext.API.Errors.Types ( BackendInternalError )
import Gargantext.API.Errors.Types
import Gargantext.API.Prelude (GargM)
import Gargantext.Core.Types.Search (HyperdataRow(..), toHyperdataRow)
import Gargantext.Database.Admin.Types.Hyperdata.Document ( HyperdataDocument )
import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument)
import Gargantext.Database.Admin.Types.Node (ContextTitle, NodeId(..), NodeTypeId, UserId, unNodeId, ContextId (..))
import Gargantext.Database.Prelude (CmdCommon)
import Gargantext.Database.Query.Table.NodeContext (getNodeContext, getContextsForNgramsTerms, ContextForNgramsTerms(..), {- getContextNgrams, -} getContextNgramsMatchingFTS)
......@@ -71,6 +71,8 @@ data HyperdataRowDocumentGQL =
, hrd_source :: Text
, hrd_title :: Text
, hrd_url :: Text
, hrd_uniqId :: Text
, hrd_uniqIdBdd :: Text
} deriving (Generic, GQLType, Show)
data NodeContextGQL = NodeContextGQL
......@@ -214,6 +216,8 @@ toHyperdataRowDocumentGQL hyperdata =
, hrd_source = _hr_source
, hrd_title = _hr_title
, hrd_url = _hr_url
, hrd_uniqId = _hr_uniqId
, hrd_uniqIdBdd = _hr_uniqIdBdd
}
HyperdataRowContact { } -> Nothing
......
......@@ -22,21 +22,21 @@ import Data.Map.Strict qualified as Map
import Data.Set qualified as Set
import Data.Text (pack)
import Gargantext.API.Ngrams.Tools (filterListWithRoot, mapTermListRoot, getRepo)
import Gargantext.API.Ngrams.Types ( NgramsTerm(unNgramsTerm) )
import Gargantext.API.Node.Corpus.Export.Types ( Corpus(..) )
import Gargantext.API.Ngrams.Types
import Gargantext.API.Node.Corpus.Export.Types
import Gargantext.API.Node.Document.Export.Types qualified as DocumentExport
import Gargantext.API.Prelude (GargNoServer)
import Gargantext.Core.NodeStory.Types ( NodeListStory )
import Gargantext.Core.NodeStory
import Gargantext.Core.Types
import Gargantext.Database.Action.Metrics.NgramsByContext (getNgramsByContextOnlyUser)
import Gargantext.Database.Admin.Config (userMaster)
import Gargantext.Database.Admin.Types.Hyperdata.Document ( HyperdataDocument(..) )
import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..))
import Gargantext.Database.Prelude (Cmd)
import Gargantext.Database.Query.Table.Node ( defaultList )
import Gargantext.Database.Query.Table.Node
import Gargantext.Database.Query.Table.Node.Error (HasNodeError)
import Gargantext.Database.Query.Table.Node.Select (selectNodesWithUsername)
import Gargantext.Database.Query.Table.NodeContext (selectDocNodes)
import Gargantext.Database.Schema.Context (_context_id)
import Gargantext.Database.Schema.Context (_context_id, _context_hyperdata)
import Gargantext.Database.Schema.Ngrams (NgramsType(..))
import Gargantext.Prelude hiding (hash)
import Gargantext.Prelude.Crypto.Hash (hash)
......@@ -51,7 +51,9 @@ getCorpus :: CorpusId
getCorpus cId lId nt' = do
let
nt = fromMaybe NgramsTerms nt'
nt = case nt' of
Nothing -> NgramsTerms
Just t -> t
listId <- case lId of
Nothing -> defaultList cId
......@@ -73,10 +75,10 @@ getCorpus cId lId nt' = do
) ns (Map.map (Set.map unNgramsTerm) ngs)
where
d_hash :: Context HyperdataDocument -> Set Text -> Text
d_hash _a b = hash [ -- fromMaybe "" (_hd_uniqId $ _context_hyperdata a),
hash b
d_hash a b = hash [ fromMaybe "" (_hd_uniqId $ _context_hyperdata a)
, hash b
]
pure $ addHeader ("attachment; filename=GarganText_corpus-" <> pack (show cId) <> ".json")
pure $ addHeader ("attachment; filename=GarganText_corpus-" <> (pack $ show cId) <> ".json")
$ Corpus { _c_corpus = Map.elems r
, _c_hash = hash $ List.map DocumentExport._d_hash $ Map.elems r }
......
......@@ -24,7 +24,7 @@ import Data.Time.Format (defaultTimeLocale, formatTime, parseTimeM)
import Data.Tuple.Select (sel1, sel2, sel3)
import Gargantext.Core (Lang(..))
import Gargantext.Core.NLP (HasNLPServer, nlpServerGet)
import Gargantext.Core.NodeStory.Types ( HasNodeStory )
import Gargantext.Core.NodeStory (HasNodeStory)
import Gargantext.Core.Text.Corpus.API qualified as API
import Gargantext.Core.Text.List (buildNgramsLists)
import Gargantext.Core.Text.List.Group.WithStem ({-StopSize(..),-} GroupParams(..))
......@@ -40,7 +40,8 @@ import Gargantext.Database.Admin.Types.Hyperdata.Corpus (HyperdataCorpus)
import Gargantext.Database.Admin.Types.Hyperdata.Document (HyperdataDocument(..))
import Gargantext.Database.Admin.Types.Node (CorpusId, ListId, NodeType(NodeTexts))
import Gargantext.Database.Prelude (hasConfig)
import Gargantext.Database.Query.Table.Node (getOrMkList, insertDefaultNodeIfNotExists)
import Gargantext.Database.Query.Table.Node (getOrMkList)
import Gargantext.Database.Query.Table.Node (insertDefaultNodeIfNotExists)
import Gargantext.Database.Query.Table.Node.Error (HasNodeError)
import Gargantext.Database.Query.Tree.Error (HasTreeError)
import Gargantext.Database.Query.Tree.Root (getOrMk_RootWithCorpus)
......@@ -53,7 +54,7 @@ import Prelude qualified
langToSearx :: Lang -> Text
langToSearx All = "en-US"
langToSearx x = Text.toLower acronym <> "-" <> acronym
langToSearx x = (Text.toLower acronym) <> "-" <> acronym
where
acronym = show x
......@@ -136,7 +137,7 @@ insertSearxResponse user cId listId l (Right (SearxResponse { _srs_results })) =
-- docs :: [Either Text HyperdataDocument]
let docs = hyperdataDocumentFromSearxResult l <$> _srs_results
--printDebug "[triggerSearxSearch] docs" docs
let docs' = mapMaybe rightToMaybe docs
let docs' = catMaybes $ rightToMaybe <$> docs
{-
Prelude.mapM_ (\(HyperdataDocument { _hd_title, _hd_publication_year, _hd_publication_date }) -> do
printDebug "[triggerSearxSearch] doc time" $
......@@ -214,14 +215,16 @@ hyperdataDocumentFromSearxResult l (SearxResult { _sr_content, _sr_engine, _sr_p
Right HyperdataDocument { _hd_bdd = Just "Searx"
, _hd_doi = Nothing
, _hd_url = Nothing
, _hd_uniqId = Nothing
, _hd_uniqIdBdd = Nothing
, _hd_page = Nothing
, _hd_title = Just _sr_title
, _hd_authors = Nothing
, _hd_institutes = Nothing
, _hd_source = Just _sr_engine
, _hd_abstract = _sr_content
, _hd_publication_date = T.pack Prelude.. formatTime defaultTimeLocale "%Y-%m-%dT%H:%M:%S" Prelude.<$> mDate
, _hd_publication_year = fromIntegral Prelude.. sel1 Prelude.<$> mGregorian
, _hd_publication_date = T.pack <$> formatTime defaultTimeLocale "%Y-%m-%dT%H:%M:%S" <$> mDate
, _hd_publication_year = fromIntegral <$> sel1 <$> mGregorian
, _hd_publication_month = sel2 <$> mGregorian
, _hd_publication_day = sel3 <$> mGregorian
, _hd_publication_hour = Nothing
......
......@@ -22,22 +22,22 @@ import Data.Swagger (ToSchema)
import Data.Text qualified as T
import Gargantext.API.Admin.EnvTypes (GargJob(..), Env)
import Gargantext.API.Admin.Orchestrator.Types (JobLog(..), AsyncJobs)
import Gargantext.API.Errors.Types ( BackendInternalError )
import Gargantext.API.Prelude ( GargM )
import Gargantext.API.Errors.Types
import Gargantext.API.Prelude
import Gargantext.Core (Lang(..))
import Gargantext.Core.NLP (nlpServerGet)
import Gargantext.Core.Text.Corpus.Parsers.Date (mDateSplit)
import Gargantext.Core.Text.Terms (TermType(..))
import Gargantext.Core.Utils.Prefix (unCapitalize, dropPrefix)
import Gargantext.Database.Action.Flow (addDocumentsToHyperCorpus)
import Gargantext.Database.Action.Flow.Types ( FlowCmdM )
import Gargantext.Database.Admin.Types.Hyperdata.Corpus ( HyperdataCorpus )
import Gargantext.Database.Action.Flow.Types
import Gargantext.Database.Admin.Types.Hyperdata.Corpus
import Gargantext.Database.Admin.Types.Hyperdata.Document (HyperdataDocument(..))
import Gargantext.Database.Admin.Types.Node ( DocId, NodeId, NodeType(NodeCorpus) )
import Gargantext.Database.Admin.Types.Node
import Gargantext.Database.Query.Table.Node (getClosestParentIdByType')
import Gargantext.Prelude
import Gargantext.Utils.Jobs (serveJobsAPI, MonadJobStatus(..))
import Servant ( JSON, Summary, type (:>), HasServer(ServerT) )
import Servant
data DocumentUpload = DocumentUpload
......@@ -108,6 +108,8 @@ documentUpload nId doc = do
let hd = HyperdataDocument { _hd_bdd = Nothing
, _hd_doi = Nothing
, _hd_url = Nothing
, _hd_uniqId = Nothing
, _hd_uniqIdBdd = Nothing
, _hd_page = Nothing
, _hd_title = Just $ if view du_title doc == "" then T.take 50 (view du_abstract doc) else view du_title doc
, _hd_authors = Just $ view du_authors doc
......
......@@ -10,22 +10,23 @@ Portability : POSIX
-}
{-# LANGUAGE MonoLocalBinds #-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE TypeOperators #-}
module Gargantext.API.Node.DocumentsFromWriteNodes
where
import Conduit ( yieldMany )
import Conduit
import Control.Lens ((^.))
import Data.Aeson ( genericParseJSON, defaultOptions, genericToJSON, FromJSON(parseJSON), ToJSON(toJSON) )
import Data.Aeson
import Data.List qualified as List
import Data.Swagger ( ToSchema )
import Data.Swagger
import Data.Text qualified as T
import Gargantext.API.Admin.Auth.Types ( AuthenticatedUser, auth_node_id, auth_user_id )
import Gargantext.API.Admin.Auth.Types
import Gargantext.API.Admin.EnvTypes (Env, GargJob(..))
import Gargantext.API.Admin.Orchestrator.Types (JobLog(..), AsyncJobs)
import Gargantext.API.Admin.Types (HasSettings)
import Gargantext.API.Errors.Types ( BackendInternalError )
import Gargantext.API.Errors.Types
import Gargantext.API.Ngrams (commitStatePatch, Versioned(..))
import Gargantext.API.Prelude (GargM)
import Gargantext.Core (Lang(..))
......@@ -38,13 +39,13 @@ import Gargantext.Core.Types.Individu (User(..))
import Gargantext.Database.Action.Flow (flowDataText, DataText(..))
import Gargantext.Database.Action.Flow.Types (FlowCmdM)
import Gargantext.Database.Admin.Types.Hyperdata.Document (HyperdataDocument(..))
import Gargantext.Database.Admin.Types.Hyperdata.Frame ( HyperdataFrame(..), getHyperdataFrameContents )
import Gargantext.Database.Admin.Types.Node ( NodeId, Node, NodeType(..) )
import Gargantext.Database.Admin.Types.Hyperdata.Frame
import Gargantext.Database.Admin.Types.Node
import Gargantext.Database.Query.Table.Node (getChildrenByType, getClosestParentIdByType', getNodeWith, getOrMkList)
import Gargantext.Database.Schema.Node (node_hyperdata, node_name, node_date)
import Gargantext.Prelude
import Gargantext.Utils.Jobs (serveJobsAPI, MonadJobStatus(..))
import Servant ( JSON, Summary, type (:>), HasServer(ServerT) )
import Servant
------------------------------------------------------------------------
type API = Summary " Documents from Write nodes."
......@@ -105,7 +106,7 @@ documentsFromWriteNodes authenticatedUser nId Params { selection, lang, paragrap
pure (node, contents)
) frameWrites
let paragraphs' = fromMaybe (7 :: Int) $ readMaybe (T.unpack paragraphs)
let paragraphs' = fromMaybe (7 :: Int) $ (readMaybe $ T.unpack paragraphs)
let parsedE = (\(node, contents)
-> hyperdataDocumentFromFrameWrite lang paragraphs' (node, contents)) <$> frameWritesWithContents
let parsed = List.concat $ rights parsedE
......@@ -158,6 +159,8 @@ hyperdataDocumentFromFrameWrite lang paragraphSize (node, contents) =
Right (List.map (\(t, ctxt) -> HyperdataDocument { _hd_bdd = Just $ show Notes
, _hd_doi = Nothing
, _hd_url = Nothing
, _hd_uniqId = Nothing
, _hd_uniqIdBdd = Nothing
, _hd_page = Nothing
, _hd_title = Just t
, _hd_authors = Just authors'
......
......@@ -16,13 +16,13 @@ Here is writtent a common interface.
module Gargantext.Core.Ext.IMTUser -- (deserialiseImtUsersFromFile)
where
import Codec.Serialise ( Serialise, deserialise )
import Codec.Serialise
import Data.ByteString.Lazy qualified as BL
import Data.Csv ( (.:), header, decodeByNameWith, FromNamedRecord(..), Header )
import Data.Csv
import Data.Text qualified as T
import Data.Vector (Vector)
import Data.Vector qualified as Vector
import Gargantext.Core.Text.Corpus.Parsers.CSV ( csvDecodeOptions, Delimiter(Tab) )
import Gargantext.Core.Text.Corpus.Parsers.CSV
import Gargantext.Database.Admin.Types.Hyperdata.Contact
import Gargantext.Prelude
import System.FilePath.Posix (takeExtension)
......@@ -156,9 +156,11 @@ imtUser2gargContact (IMTUser { id
, _hc_where = [ou]
, _hc_title = title
, _hc_source = entite
, _hc_lastValidation = date_modification }
, _hc_lastValidation = date_modification
, _hc_uniqIdBdd = Nothing
, _hc_uniqId = Nothing }
where
title = (<>) <$> fmap (\p -> p <> " ") prenom <*> nom
title = (<>) <$> (fmap (\p -> p <> " ") prenom) <*> nom
qui = ContactWho { _cw_id = id
, _cw_firstName = prenom
, _cw_lastName = nom
......@@ -180,7 +182,7 @@ imtUser2gargContact (IMTUser { id
-- meta = ContactMetaData (Just "IMT annuaire") date_modification'
toList' Nothing = []
toList' (Just x) = [x]
......@@ -15,6 +15,7 @@ Portability : POSIX
module Gargantext.Core.Flow.Types where
import Control.Lens
import Gargantext.Database.Admin.Types.Hyperdata
import Gargantext.Database.Admin.Types.Node
import Gargantext.Database.Schema.Node (node_hash_id)
import Gargantext.Prelude
......@@ -24,6 +25,14 @@ class UniqId a
where
uniqId :: Lens' a (Maybe Hash)
instance UniqId HyperdataDocument
where
uniqId = hd_uniqId
instance UniqId HyperdataContact
where
uniqId = hc_uniqId
instance UniqId (Node a)
where
uniqId = node_hash_id
......
......@@ -19,13 +19,13 @@ module Gargantext.Core.Text.Corpus.API.Arxiv
) where
import Arxiv qualified as Arxiv
import Conduit ( ConduitT, (.|), mapC, takeC )
import Conduit
import Data.Text (unpack)
import Data.Text qualified as Text
import Gargantext.Core (Lang(..))
import Gargantext.Core.Text.Corpus.Query as Corpus
import Gargantext.Core.Types (Term(..))
import Gargantext.Database.Admin.Types.Hyperdata.Document ( HyperdataDocument(..) )
import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..))
import Gargantext.Prelude hiding (get)
import Network.Api.Arxiv qualified as Ax
......@@ -46,7 +46,7 @@ convertQuery q = mkQuery (interpretQuery q transformAST)
transformAST ast = case ast of
BAnd sub (BConst (Negative term))
-- The second term become positive, so that it can be translated.
-> Ax.AndNot <$> transformAST sub <*> transformAST (BConst (Positive term))
-> Ax.AndNot <$> (transformAST sub) <*> transformAST (BConst (Positive term))
BAnd term1 (BNot term2)
-> Ax.AndNot <$> transformAST term1 <*> transformAST term2
BAnd sub1 sub2
......@@ -88,7 +88,7 @@ toDoc l (Arxiv.Result { abstract
, authors = aus
--, categories
, doi
-- , id
, id
, journal
--, primaryCategory
, publication_date
......@@ -99,6 +99,8 @@ toDoc l (Arxiv.Result { abstract
) = HyperdataDocument { _hd_bdd = Just "Arxiv"
, _hd_doi = Just $ Text.pack doi
, _hd_url = Just $ Text.pack url
, _hd_uniqId = Just $ Text.pack id
, _hd_uniqIdBdd = Nothing
, _hd_page = Nothing
, _hd_title = Just $ Text.pack title
, _hd_authors = authors aus
......@@ -116,10 +118,13 @@ toDoc l (Arxiv.Result { abstract
where
authors :: [Ax.Author] -> Maybe Text
authors [] = Nothing
authors aus' = Just $ Text.intercalate ", "
$ map (Text.pack . Ax.auName) aus'
authors aus' = Just $ (Text.intercalate ", ")
$ map Text.pack
$ map Ax.auName aus'
institutes :: [Ax.Author] -> Maybe Text
institutes [] = Nothing
institutes aus' = Just $ Text.intercalate ", "
$ map ((Text.replace ", " " - " . Text.pack) . Ax.auFil) aus'
institutes aus' = Just $ (Text.intercalate ", ")
$ (map (Text.replace ", " " - "))
$ map Text.pack
$ map Ax.auFil aus'
......@@ -9,7 +9,7 @@ Portability : POSIX
-}
module Gargantext.Core.Text.Corpus.API.EPO where
import Conduit ( ConduitT, (.|), mapC )
import Conduit
import Data.LanguageCodes (ISO639_1)
import Data.Map.Strict qualified as Map
import Data.Text qualified as T
......@@ -17,7 +17,7 @@ import EPO.API.Client.Types qualified as EPO
import EPO.API.Client.Implementation qualified as EPO
import Gargantext.Core (iso639ToText)
import Gargantext.Core.Text.Corpus.Query qualified as Corpus
import Gargantext.Database.Admin.Types.Hyperdata.Document ( HyperdataDocument(..) )
import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..))
import Network.URI (parseURI)
import Protolude
import Servant.Client.Core (ClientError(ConnectionError))
......@@ -39,7 +39,7 @@ get (Just authKey) epoAPIUrl q lang mLimit = do
Just apiUrl -> do
eRes <- EPO.searchEPOAPIC apiUrl authKey Nothing limit (Corpus.getRawQuery q)
pure $ (\(total, itemsC) -> (Just total, itemsC .| mapC (toDoc lang))) <$> eRes
-- EPO.Paginated { .. } <- EPO.searchEPOAPI apiUrl authKey 1 20 (Corpus.getRawQuery q)
-- pure $ Right ( Just $ fromIntegral total, yieldMany items .| mapC (toDoc lang) )
......@@ -48,6 +48,8 @@ toDoc lang (EPO.HyperdataDocument { .. }) =
HyperdataDocument { _hd_bdd = Just "EPO"
, _hd_doi = Nothing
, _hd_url = Nothing
, _hd_uniqId = id
, _hd_uniqIdBdd = id
, _hd_page = Nothing
, _hd_title = Map.lookup lang titles
, _hd_authors = authors_
......@@ -64,10 +66,10 @@ toDoc lang (EPO.HyperdataDocument { .. }) =
, _hd_language_iso2 = Just $ iso639ToText lang }
where
authors_ = if null authors
authors_ = if authors == []
then Nothing
else Just (T.intercalate ", " authors)
-- EPO.withAuthKey authKey $ \token -> do
-- let range = EPO.Range { rBegin = 1, rEnd = limit }
-- (len, docsC) <- EPO.searchPublishedDataWithFetchC token (Just $ Corpus.getRawQuery q) (Just range)
......
......@@ -12,12 +12,14 @@ Portability : POSIX
module Gargantext.Core.Text.Corpus.API.Hal
where
import Conduit ( ConduitT, (.|), mapMC )
import Conduit
import Data.Either
import Data.LanguageCodes qualified as ISO639
import Data.Map.Strict qualified as Map
import Data.Text (pack)
import Data.Maybe
import Data.Text (pack, intercalate)
import Gargantext.Core.Text.Corpus.Parsers.Date qualified as Date
import Gargantext.Database.Admin.Types.Hyperdata.Document ( HyperdataDocument(..) )
import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..))
import Gargantext.Defaults qualified as Defaults
import Gargantext.Prelude hiding (intercalate)
import HAL qualified as HAL
......@@ -28,7 +30,7 @@ import Servant.Client (ClientError)
get :: Maybe ISO639.ISO639_1 -> Text -> Maybe Int -> IO [HyperdataDocument]
get la q ml = do
eDocs <- HAL.getMetadataWith [q] (Just 0) (fromIntegral <$> ml) la
either (panicTrace . pack . show) (mapM (toDoc' la) . HAL._docs) eDocs
either (panicTrace . pack . show) (\d -> mapM (toDoc' la) $ HAL._docs d) eDocs
getC :: Maybe ISO639.ISO639_1 -> Text -> Maybe Int -> IO (Either ClientError (Maybe Integer, ConduitT () HyperdataDocument IO ()))
getC la q ml = do
......@@ -43,15 +45,17 @@ toDoc' la (HAL.Corpus { .. }) = do
-- printDebug "[toDoc corpus] h" h
let mDateS = maybe (Just $ pack $ show Defaults.year) Just _corpus_date
let (utctime, (pub_year, pub_month, pub_day)) = Date.mDateSplit mDateS
let abstractDefault = unwords _corpus_abstract
let abstractDefault = intercalate " " _corpus_abstract
let abstract = case la of
Nothing -> abstractDefault
Just l -> maybe abstractDefault unwords (Map.lookup l _corpus_abstract_lang_map)
Just l -> fromMaybe abstractDefault (intercalate " " <$> Map.lookup l _corpus_abstract_lang_map)
pure HyperdataDocument { _hd_bdd = Just "Hal"
, _hd_doi = Just $ pack $ show _corpus_docid
, _hd_url = Nothing
, _hd_uniqId = Nothing
, _hd_uniqIdBdd = Nothing
, _hd_page = Nothing
, _hd_title = Just $ unwords _corpus_title
, _hd_title = Just $ intercalate " " _corpus_title
, _hd_authors = Just $ foldl (\x y -> if x == "" then y else x <> ", " <> y) "" _corpus_authors_names
, _hd_institutes = Just $ foldl (\x y -> if x == "" then y else x <> ", " <> y) "" $ _corpus_authors_affiliations <> map show _corpus_struct_id
, _hd_source = Just $ maybe "Nothing" identity _corpus_source
......
......@@ -18,12 +18,12 @@ import Gargantext.Core (Lang(..))
import Gargantext.Core.Text.Corpus.Parsers (cleanText)
import Gargantext.Core.Text.Corpus.Parsers.CSV (writeDocs2Csv)
import Gargantext.Core.Text.Corpus.Parsers.Date qualified as Date
import Gargantext.Database.Admin.Types.Hyperdata.Document ( HyperdataDocument(..) )
import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..))
import Gargantext.Defaults qualified as Defaults
import Gargantext.Prelude hiding (get)
import Isidore qualified as Isidore
import Isidore.Client
import Servant.Client ( ClientError(DecodeFailure) )
import Servant.Client
-- | TODO work with the ServantErr
get :: Lang -> Maybe Isidore.Limit
......@@ -40,7 +40,7 @@ get la l q a = do
iDocs <- either printErr _content <$> Isidore.get l q a
hDocs <- mapM (isidoreToDoc la) (toIsidoreDocs iDocs)
hDocs <- mapM (\d -> isidoreToDoc la d) (toIsidoreDocs iDocs)
pure hDocs
isidore2csvFile :: FilePath -> Lang -> Maybe Isidore.Limit
......@@ -54,7 +54,7 @@ isidoreToDoc :: Lang -> IsidoreDoc -> IO HyperdataDocument
isidoreToDoc l (IsidoreDoc t a d u s as) = do
let
author :: Author -> Text
author (Author fn ln) = _name fn <> ", " <> _name ln
author (Author fn ln) = (_name fn) <> ", " <> (_name ln)
author (Authors aus) = Text.intercalate ". " $ map author aus
creator2text :: Creator -> Text
......@@ -66,19 +66,21 @@ isidoreToDoc l (IsidoreDoc t a d u s as) = do
langText (OnlyText t2 ) = t2
langText (ArrayText ts ) = Text.intercalate " " $ map langText ts
let mDateS = maybe (Just $ Text.pack $ show Defaults.year) Just d
let mDateS = maybe (Just $ Text.pack $ show Defaults.year) (Just) d
let (utcTime, (pub_year, pub_month, pub_day)) = Date.mDateSplit mDateS
pure HyperdataDocument
{ _hd_bdd = Just "Isidore"
, _hd_doi = Nothing
, _hd_url = u
, _hd_uniqId = Nothing
, _hd_uniqIdBdd = Nothing
, _hd_page = Nothing
, _hd_title = Just $ cleanText $ langText t
, _hd_authors = creator2text <$> as
, _hd_institutes = Nothing
, _hd_source = Just $ maybe "Nothing" (identity . _sourceName) s
, _hd_abstract = cleanText . langText <$> a
, _hd_source = Just $ maybe "Nothing" identity $ _sourceName <$> s
, _hd_abstract = cleanText <$> langText <$> a
, _hd_publication_date = fmap (Text.pack . show) utcTime
, _hd_publication_year = pub_year
, _hd_publication_month = pub_month
......
......@@ -9,15 +9,15 @@ Portability : POSIX
-}
module Gargantext.Core.Text.Corpus.API.OpenAlex where
import Conduit ( ConduitT, (.|), mapC, takeC )
import Conduit
import Data.LanguageCodes qualified as ISO639
import Data.Text qualified as T
import qualified Data.Text as T
import Gargantext.Core (iso639ToText)
import Gargantext.Core.Text.Corpus.Query as Corpus
import Gargantext.Database.Admin.Types.Hyperdata.Document (HyperdataDocument(..))
import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..))
import Protolude
import OpenAlex qualified as OA
import OpenAlex.Types qualified as OA
import qualified OpenAlex as OA
import qualified OpenAlex.Types as OA
import Servant.Client (ClientError)
......@@ -37,6 +37,8 @@ toDoc (OA.Work { .. } ) =
HyperdataDocument { _hd_bdd = Just "OpenAlex"
, _hd_doi = doi
, _hd_url = url
, _hd_uniqId = Just id
, _hd_uniqIdBdd = Just id
, _hd_page = firstPage biblio
, _hd_title = title
, _hd_authors = authors authorships
......@@ -53,25 +55,25 @@ toDoc (OA.Work { .. } ) =
, _hd_language_iso2 = language }
where
firstPage :: OA.Biblio -> Maybe Int
firstPage OA.Biblio { first_page } = (readMaybe . T.unpack) =<< first_page
firstPage OA.Biblio { first_page } = maybe Nothing readMaybe $ T.unpack <$> first_page
authors :: [OA.Authorship] -> Maybe Text
authors [] = Nothing
authors aus = Just $ T.intercalate ", " $ mapMaybe getDisplayName aus
authors aus = Just $ T.intercalate ", " $ catMaybes (getDisplayName <$> aus)
where
getDisplayName :: OA.Authorship -> Maybe Text
getDisplayName OA.Authorship { author = OA.DehydratedAuthor { display_name = dn } } = dn
institutes :: [OA.Authorship] -> Maybe Text
institutes [] = Nothing
institutes aus = Just $ T.intercalate ", " (T.replace ", " " - " . getInstitutesNames <$> aus)
institutes aus = Just $ T.intercalate ", " ((T.replace ", " " - ") . getInstitutesNames <$> aus)
where
getInstitutesNames OA.Authorship { institutions } = T.intercalate ", " $ getDisplayName <$> institutions
getDisplayName :: OA.DehydratedInstitution -> Text
getDisplayName OA.DehydratedInstitution { display_name = dn } = dn
source :: Maybe Text
source = getSource =<< primary_location
source = maybe Nothing getSource primary_location
where
getSource OA.Location { source = s } = getSourceDisplayName <$> s
getSourceDisplayName OA.DehydratedSource { display_name = dn } = dn
......@@ -20,13 +20,13 @@ module Gargantext.Core.Text.Corpus.API.Pubmed
)
where
import Conduit ( ConduitT, (.|), mapC, takeC )
import Conduit
import Data.Text qualified as Text
import Data.Text.Encoding qualified as TE
import Gargantext.Core (Lang(..))
import Gargantext.Core.Text.Corpus.Query as Corpus
import Gargantext.Core.Types (Term(..))
import Gargantext.Database.Admin.Types.Hyperdata.Document ( HyperdataDocument(..) )
import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..))
import Gargantext.Prelude hiding (get)
import Network.HTTP.Types.URI (EscapeItem(..), renderQueryPartialEscape)
import PUBMED qualified as PubMed
......@@ -64,7 +64,7 @@ convertQuery q = ESearch (interpretQuery q transformAST)
transformAST ast = case ast of
BAnd sub (BConst (Negative term))
-- The second term become positive, so that it can be translated.
-> transformAST sub <> [QN "+AND+NOT+"] <> transformAST (BConst (Positive term))
-> (transformAST sub) <> [QN "+AND+NOT+"] <> transformAST (BConst (Positive term))
BAnd term1 (BNot term2)
-> transformAST term1 <> [QN "+AND+NOT+"] <> transformAST term2
BAnd sub1 sub2
......@@ -108,11 +108,14 @@ get apiKey q l = do
-- <$> PubMed.getMetadataWithC q l
toDoc :: Lang -> PubMedDoc.PubMed -> HyperdataDocument
toDoc l (PubMedDoc.PubMed { pubmed_article = PubMedDoc.PubMedArticle t j as aus
toDoc l (PubMedDoc.PubMed { pubmed_id
, pubmed_article = PubMedDoc.PubMedArticle t j as aus
, pubmed_date = PubMedDoc.PubMedDate a y m d }
) = HyperdataDocument { _hd_bdd = Just "PubMed"
, _hd_doi = Nothing
, _hd_url = Nothing
, _hd_uniqId = Just $ Text.pack $ show pubmed_id
, _hd_uniqIdBdd = Nothing
, _hd_page = Nothing
, _hd_title = t
, _hd_authors = authors aus
......@@ -130,14 +133,16 @@ toDoc l (PubMedDoc.PubMed { pubmed_article = PubMedDoc.PubMedArticle t j as aus
where
authors :: [PubMedDoc.Author] -> Maybe Text
authors [] = Nothing
authors au = Just $ Text.intercalate ", "
$ mapMaybe (\n -> PubMedDoc.foreName n <> Just " " <> PubMedDoc.lastName n) au
authors au = Just $ (Text.intercalate ", ")
$ catMaybes
$ map (\n -> PubMedDoc.foreName n <> Just " " <> PubMedDoc.lastName n) au
institutes :: [PubMedDoc.Author] -> Maybe Text
institutes [] = Nothing
institutes au = Just $ Text.intercalate ", "
$ map (Text.replace ", " " - ")
$ mapMaybe PubMedDoc.affiliation au
institutes au = Just $ (Text.intercalate ", ")
$ (map (Text.replace ", " " - "))
$ catMaybes
$ map PubMedDoc.affiliation au
abstract :: [Text] -> Maybe Text
......
......@@ -46,7 +46,7 @@ import Gargantext.Core.Text.Corpus.Parsers.JSON (parseJSONC, parseIstex)
import Gargantext.Core.Text.Corpus.Parsers.RIS qualified as RIS
import Gargantext.Core.Text.Corpus.Parsers.RIS.Presse (presseEnrich)
import Gargantext.Core.Text.Corpus.Parsers.WOS qualified as WOS
import Gargantext.Database.Admin.Types.Hyperdata.Document ( HyperdataDocument(..) )
import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..))
import Gargantext.Database.Query.Table.Ngrams (NgramsType(..))
import Gargantext.Prelude hiding (show, undefined)
import Gargantext.Utils.Zip qualified as UZip
......@@ -88,10 +88,10 @@ parseFormatC :: MonadBaseControl IO m
-> m (Either Text (Integer, ConduitT () HyperdataDocument IO ()))
parseFormatC CsvGargV3 Plain bs = do
let eParsedC = parseCsvC $ DBL.fromStrict bs
pure (second (transPipe (pure . runIdentity)) <$> eParsedC)
pure ((\(len, parsedC) -> (len, transPipe (pure . runIdentity) parsedC)) <$> eParsedC)
parseFormatC CsvHal Plain bs = do
let eParsedC = parseCsvC $ DBL.fromStrict bs
pure (second (transPipe (pure . runIdentity)) <$> eParsedC)
pure ((\(len, parsedC) -> (len, transPipe (pure . runIdentity) parsedC)) <$> eParsedC)
parseFormatC Istex Plain bs = do
ep <- liftBase $ parseIstex EN $ DBL.fromStrict bs
pure $ (\p -> (1, yieldMany [p])) <$> ep
......@@ -120,15 +120,15 @@ parseFormatC Iramuteq Plain bs = do
, yieldMany docs
.| mapC (map $ first Iramuteq.keys)
.| mapC (map $ both decodeUtf8)
.| mapMC (toDoc Iramuteq . map (second (DT.replace "_" " ")))
.| mapMC ((toDoc Iramuteq) . (map (second (DT.replace "_" " "))))
)
)
<$> eDocs
parseFormatC JSON Plain bs = do
let eParsedC = parseJSONC $ DBL.fromStrict bs
pure (second (transPipe (pure . runIdentity)) <$> eParsedC)
pure ((\(len, parsedC) -> (len, transPipe (pure . runIdentity) parsedC)) <$> eParsedC)
parseFormatC ft ZIP bs = liftBase $ UZip.withZipFileBS bs $ do
fileNames <- filter (filterZIPFileNameP ft) . DM.keys <$> getEntries
fileNames <- filter (filterZIPFileNameP ft) <$> DM.keys <$> getEntries
printDebug "[parseFormatC] fileNames" fileNames
fileContents <- mapM getEntry fileNames
--printDebug "[parseFormatC] fileContents" fileContents
......@@ -145,19 +145,19 @@ parseFormatC ft ZIP bs = liftBase $ UZip.withZipFileBS bs $ do
let contents' = snd <$> contents
let totalLength = sum lenghts
pure $ Right ( totalLength
, void (sequenceConduits contents') ) -- .| mapM_C (printDebug "[parseFormatC] doc")
, sequenceConduits contents' >> pure () ) -- .| mapM_C (printDebug "[parseFormatC] doc")
_ -> pure $ Left $ DT.intercalate "\n" errs
parseFormatC _ _ _ = pure $ Left "Not implemented"
filterZIPFileNameP :: FileType -> EntrySelector -> Bool
filterZIPFileNameP Istex f = (takeExtension (unEntrySelector f) == ".json") &&
(unEntrySelector f /= "manifest.json")
((unEntrySelector f) /= "manifest.json")
filterZIPFileNameP _ _ = True
etale :: [HyperdataDocument] -> [HyperdataDocument]
etale = concatMap etale'
etale = concat . (map etale')
where
etale' :: HyperdataDocument -> [HyperdataDocument]
etale' h = map (\t -> h { _hd_abstract = Just t })
......@@ -232,6 +232,8 @@ toDoc ff d = do
let hd = HyperdataDocument { _hd_bdd = Just $ DT.pack $ show ff
, _hd_doi = lookup "doi" d
, _hd_url = lookup "URL" d
, _hd_uniqId = Nothing
, _hd_uniqIdBdd = Nothing
, _hd_page = Nothing
, _hd_title = lookup "title" d
, _hd_authors = lookup "authors" d
......@@ -291,7 +293,7 @@ runParser format text = pure $ runParser' format text
runParser' :: FileType
-> DB.ByteString
-> Either Text [[(DB.ByteString, DB.ByteString)]]
-> (Either Text [[(DB.ByteString, DB.ByteString)]])
runParser' format text = first DT.pack $ parseOnly (withParser format) text
openZip :: FilePath -> IO [DB.ByteString]
......@@ -315,5 +317,5 @@ clean txt = DBC.map clean' txt
--
splitOn :: NgramsType -> Maybe Text -> Text -> [Text]
splitOn Authors (Just "WOS") = DT.splitOn "; "
splitOn _ _ = DT.splitOn ", "
splitOn Authors (Just "WOS") = (DT.splitOn "; ")
splitOn _ _ = (DT.splitOn ", ")
......@@ -33,8 +33,8 @@ book2csv :: Int -> FileDir -> FileOut -> IO ()
book2csv n f_in f_out = do
files <- filesOf f_in
texts <- readPublis f_in files
let publis = concatMap (file2publi n) texts
let docs = zipWith publiToHyperdata [1..] publis
let publis = List.concat $ map (file2publi n) texts
let docs = map (\(y,p) -> publiToHyperdata y p) $ List.zip [1..] publis
DBL.writeFile f_out (hyperdataDocument2csv docs)
filesOf :: FileDir -> IO [FilePath]
......@@ -43,7 +43,7 @@ filesOf fd = List.sort -- sort by filenam
<$> getDirectoryContents fd
readPublis :: FileDir -> [FilePath] -> IO [(FilePath, Text)]
readPublis fd = mapM (\fp -> DBL.readFile (fd <> fp) >>= \txt -> pure (fp, cs txt))
readPublis fd fps = mapM (\fp -> DBL.readFile (fd <> fp) >>= \txt -> pure (fp, cs txt)) fps
------------------------------------------------------------------------
-- Main Types
......@@ -63,7 +63,7 @@ type FileDir = FilePath
---------------------------------------------------------------------
file2publi :: Int -> (FilePath, Text) -> [Publi]
file2publi n (fp,theText) = map (uncurry (Publi authors source)) theTexts
file2publi n (fp,theText) = map (\(t,txt) -> Publi authors source t txt) theTexts
where
theTexts = text2titleParagraphs n theText
FileInfo authors source = fileNameInfo fp
......@@ -81,6 +81,8 @@ publiToHyperdata y (Publi a s t txt) =
HyperdataDocument { _hd_bdd = Just "Book File"
, _hd_doi = Nothing
, _hd_url = Nothing
, _hd_uniqId = Nothing
, _hd_uniqIdBdd = Nothing
, _hd_page = Nothing
, _hd_title = Just t
, _hd_authors = Just (DT.concat a)
......
......@@ -14,7 +14,8 @@ CSV parser for Gargantext corpus files.
module Gargantext.Core.Text.Corpus.Parsers.CSV where
import Conduit ( ConduitT, (.|), yieldMany, mapC )
import Conduit
import Control.Applicative
import Data.ByteString qualified as BS
import Data.ByteString.Lazy qualified as BL
import Data.Csv
......@@ -23,9 +24,9 @@ import Data.Text qualified as T
import Data.Time.Segment (jour)
import Data.Vector (Vector)
import Data.Vector qualified as V
import Gargantext.Core.Text ( sentences, unsentences )
import Gargantext.Core.Text.Context ( splitBy, SplitContext(..) )
import Gargantext.Database.Admin.Types.Hyperdata.Document ( HyperdataDocument(..) )
import Gargantext.Core.Text
import Gargantext.Core.Text.Context
import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..))
import Gargantext.Prelude hiding (length, show)
import Protolude
......@@ -59,6 +60,8 @@ toDoc (CsvGargV3 did dt _ dpy dpm dpd dab dau) =
HyperdataDocument { _hd_bdd = Just "CSV"
, _hd_doi = Just . pack . show $ did
, _hd_url = Nothing
, _hd_uniqId = Nothing
, _hd_uniqIdBdd = Nothing
, _hd_page = Nothing
, _hd_title = Just dt
, _hd_authors = Nothing
......@@ -90,11 +93,11 @@ toDocs v = V.toList
(V.enumFromN 1 (V.length v'')) v''
where
v'' = V.foldl (\v' sep -> V.concatMap (splitDoc (docsSize v') sep) v') v seps
seps= V.fromList [Paragraphs 1, Sentences 3, Chars 3]
seps= (V.fromList [Paragraphs 1, Sentences 3, Chars 3])
---------------------------------------------------------------
fromDocs :: Vector CsvGargV3 -> Vector CsvDoc
fromDocs = V.map fromDocs'
fromDocs docs = V.map fromDocs' docs
where
fromDocs' (CsvGargV3 { .. }) = CsvDoc { csv_title = d_title
, csv_source = d_source
......@@ -108,11 +111,16 @@ fromDocs = V.map fromDocs'
-- | Split a document in its context
-- TODO adapt the size of the paragraph according to the corpus average
splitDoc :: Mean -> SplitContext -> CsvDoc -> Vector CsvDoc
splitDoc m splt doc =
let docSize = (T.length $ csv_abstract doc) in
if (docSize > 1000) && (mod (round m) docSize >= 10)
then splitDoc' splt doc
else V.fromList [doc]
splitDoc m splt doc = let docSize = (T.length $ csv_abstract doc) in
if docSize > 1000
then
if (mod (round m) docSize) >= 10
then
splitDoc' splt doc
else
V.fromList [doc]
else
V.fromList [doc]
where
splitDoc' :: SplitContext -> CsvDoc -> Vector CsvDoc
splitDoc' contextSize (CsvDoc { .. }) = V.fromList $ [firstDoc] <> nextDocs
......@@ -144,7 +152,7 @@ unIntOrDec :: IntOrDec -> Int
unIntOrDec (IntOrDec i) = i
instance FromField IntOrDec where
parseField s = case runParser (parseField s :: Parser Int) of
Left _err -> IntOrDec . floor <$> (parseField s :: Parser Double)
Left _err -> IntOrDec <$> floor <$> (parseField s :: Parser Double)
Right n -> pure $ IntOrDec n
instance ToField IntOrDec where
toField (IntOrDec i) = toField i
......@@ -245,15 +253,15 @@ readByteStringStrict :: (FromNamedRecord a)
-> Delimiter
-> BS.ByteString
-> Either Text (Header, Vector a)
readByteStringStrict d ff = readByteStringLazy d ff . BL.fromStrict
readByteStringStrict d ff = (readByteStringLazy d ff) . BL.fromStrict
------------------------------------------------------------------------
-- | TODO use readFileLazy
readCSVFile :: FilePath -> IO (Either Text (Header, Vector CsvDoc))
readCSVFile fp = do
result <- readCsvLazyBS Comma <$> BL.readFile fp
result <- fmap (readCsvLazyBS Comma) $ BL.readFile fp
case result of
Left _err -> readCsvLazyBS Tab <$> BL.readFile fp
Left _err -> fmap (readCsvLazyBS Tab) $ BL.readFile fp
Right res -> pure $ Right res
......@@ -374,6 +382,8 @@ csvHal2doc (CsvHal { .. }) =
HyperdataDocument { _hd_bdd = Just "CsvHal"
, _hd_doi = Just csvHal_doiId_s
, _hd_url = Just csvHal_url
, _hd_uniqId = Nothing
, _hd_uniqIdBdd = Nothing
, _hd_page = Nothing
, _hd_title = Just csvHal_title
, _hd_authors = Just csvHal_authors
......@@ -397,6 +407,8 @@ csv2doc (CsvDoc { .. })
= HyperdataDocument { _hd_bdd = Just "CsvHal"
, _hd_doi = Nothing
, _hd_url = Nothing
, _hd_uniqId = Nothing
, _hd_uniqIdBdd = Nothing
, _hd_page = Nothing
, _hd_title = Just csv_title
, _hd_authors = Just csv_authors
......@@ -422,10 +434,10 @@ csv2doc (CsvDoc { .. })
parseHal :: FilePath -> IO (Either Text [HyperdataDocument])
parseHal fp = do
r <- readCsvHal fp
pure $ V.toList . V.map csvHal2doc . snd <$> r
pure $ (V.toList . V.map csvHal2doc . snd) <$> r
parseHal' :: BL.ByteString -> Either Text [HyperdataDocument]
parseHal' bs = V.toList . V.map csvHal2doc . snd <$> readCsvHalLazyBS bs
parseHal' bs = (V.toList . V.map csvHal2doc . snd) <$> readCsvHalLazyBS bs
------------------------------------------------------------------------
......@@ -443,7 +455,7 @@ parseCsv' bs = do
result = case readCsvLazyBS Comma bs of
Left _err -> readCsvLazyBS Tab bs
Right res -> Right res
V.toList . V.map csv2doc . snd <$> result
(V.toList . V.map csv2doc . snd) <$> result
parseCsvC :: BL.ByteString
-> Either Text (Integer, ConduitT () HyperdataDocument Identity ())
......
......@@ -13,12 +13,12 @@ module Gargantext.Core.Text.Corpus.Parsers.Gitlab (
Issue(..), gitlabIssue2hyperdataDocument, readFile_Issues, readFile_IssuesAsDocs
) where
import Data.Aeson ( FromJSON(parseJSON), decode, (.:), (.:?), withObject )
import Data.Aeson
import Data.ByteString.Lazy qualified as DBL
import Data.Text qualified as DT
import Data.Time
import Gargantext.Core (Lang(..))
import Gargantext.Database.Admin.Types.Hyperdata.Document ( HyperdataDocument(..) )
import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..))
import Gargantext.Prelude
data Issue = Issue { _issue_id :: !Int
......@@ -42,6 +42,8 @@ gitlabIssue2hyperdataDocument issue = HyperdataDocument
{ _hd_bdd = Nothing
, _hd_doi = Nothing
, _hd_url = Nothing
, _hd_uniqId = Nothing
, _hd_uniqIdBdd = Nothing
, _hd_page = Nothing
, _hd_title = Just (_issue_title issue)
, _hd_authors = Nothing
......
......@@ -31,8 +31,8 @@ import Data.ByteString.Lazy qualified as DBL
import Data.JsonStream.Parser qualified as P
import Data.Text qualified as Text
import Gargantext.Core (Lang(..))
import Gargantext.Database.Admin.Types.Hyperdata.Document (HyperdataDocument(..), ToHyperdataDocument, toHyperdataDocument)
import Gargantext.Database.GargDB ( ReadFile(..) )
import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..), ToHyperdataDocument, toHyperdataDocument)
import Gargantext.Database.GargDB
import Gargantext.Prelude
data GrandDebatReference = GrandDebatReference
......@@ -43,14 +43,14 @@ data GrandDebatReference = GrandDebatReference
, createdAt :: !(Maybe Text)
, publishedAt :: !(Maybe Text)
, updatedAt :: !(Maybe Text)
, trashed :: !(Maybe Bool)
, trashedStatus :: !(Maybe Text)
, authorId :: !(Maybe Text)
, authorType :: !(Maybe Text)
, authorZipCode :: !(Maybe Text)
, responses :: !(Maybe [GrandDebatResponse])
}
deriving (Show, Generic)
......@@ -77,6 +77,8 @@ instance ToHyperdataDocument GrandDebatReference
HyperdataDocument { _hd_bdd = Just "GrandDebat"
, _hd_doi = id
, _hd_url = Nothing
, _hd_uniqId = Nothing
, _hd_uniqIdBdd = Nothing
, _hd_page = Nothing
, _hd_title = title
, _hd_authors = authorType
......@@ -92,10 +94,12 @@ instance ToHyperdataDocument GrandDebatReference
, _hd_publication_second = Nothing
, _hd_language_iso2 = Just $ Text.pack $ show FR }
where
toAbstract = Text.intercalate " . " . (filter (/= "") . map toSentence)
toAbstract = (Text.intercalate " . ") . ((filter (/= "")) . (map toSentence))
toSentence (GrandDebatResponse _id _qtitle _qvalue r) = case r of
Nothing -> ""
Just r' -> if Text.length r' > 10 then r' else ""
Just r' -> case Text.length r' > 10 of
True -> r'
False -> ""
instance ReadFile [GrandDebatReference]
where
......
......@@ -20,14 +20,14 @@ TODO:
module Gargantext.Core.Text.Corpus.Parsers.Isidore where
import Control.Lens ( (^.), (.~) )
import Control.Lens hiding (contains)
import Data.ByteString.Lazy (ByteString)
import Data.RDF ( Node(LNode, UNode), LValue(PlainLL, TypedL, PlainL) )
import Data.RDF hiding (triple, Query)
import Data.Text qualified as T
import Database.HSparql.Connection ( BindingValue(..), EndPoint, structureContent )
import Database.HSparql.Connection
import Database.HSparql.QueryGenerator
import Gargantext.Core (Lang)
import Gargantext.Database.Admin.Types.Hyperdata.Document ( HyperdataDocument(..) )
import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..))
import Gargantext.Prelude hiding (ByteString)
import Network.Wreq (getWith, Response, defaults, header, param, responseStatus, responseBody)
import Prelude qualified
......@@ -115,7 +115,7 @@ unbound _ Unbound = Nothing
unbound _ (Bound (UNode x)) = Just x
unbound _ (Bound (LNode (TypedL x _))) = Just x
unbound _ (Bound (LNode (PlainL x))) = Just x
unbound l (Bound (LNode (PlainLL x l'))) = if l' == T.toLower (show l) then Just x else Nothing
unbound l (Bound (LNode (PlainLL x l'))) = if l' == (T.toLower $ show l) then Just x else Nothing
unbound _ _ = Nothing
bind2doc :: Lang -> [BindingValue] -> HyperdataDocument
......@@ -123,6 +123,8 @@ bind2doc l [ link', date, langDoc, authors, _source, publisher, title, abstract
HyperdataDocument { _hd_bdd = Just "Isidore"
, _hd_doi = Nothing
, _hd_url = unbound l link'
, _hd_uniqId = Nothing
, _hd_uniqIdBdd = Nothing
, _hd_page = Nothing
, _hd_title = unbound l title
, _hd_authors = unbound l authors
......
......@@ -19,10 +19,11 @@ module Gargantext.Core.Text.Corpus.Parsers.JSON.Istex where
import Data.Text qualified as T
import Gargantext.Core (Lang(..))
import Gargantext.Core.Text.Corpus.Parsers.Date qualified as Date
import Gargantext.Database.Admin.Types.Hyperdata.Document (HyperdataDocument(..))
import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..))
import Gargantext.Defaults qualified as Defaults
import Gargantext.Prelude hiding (length)
import Gargantext.Prelude hiding (length, show)
import ISTEX.Client qualified as ISTEX
import Protolude
-- | TODO remove dateSplit here
......@@ -36,11 +37,13 @@ toDoc la (ISTEX.Document i t a ab d s) = do
pure $ HyperdataDocument { _hd_bdd = Just "Istex"
, _hd_doi = Just i
, _hd_url = Nothing
, _hd_uniqId = Nothing
, _hd_uniqIdBdd = Nothing
, _hd_page = Nothing
, _hd_title = t
, _hd_authors = Just $ foldl' (\x y -> if x == "" then y else x <> ", " <> y) "" (map ISTEX._author_name a)
, _hd_institutes = Just $ foldl' (\x y -> if x == "" then y else x <> ", " <> y) "" (concatMap ISTEX._author_affiliations a)
, _hd_source = Just $ foldl' (\x y -> if x == "" then y else x <> ", " <> y) "" (ISTEX._source_title s)
, _hd_authors = Just $ foldl (\x y -> if x == "" then y else x <> ", " <> y) "" (map ISTEX._author_name a)
, _hd_institutes = Just $ foldl (\x y -> if x == "" then y else x <> ", " <> y) "" (concat $ (map ISTEX._author_affiliations) a)
, _hd_source = Just $ foldl (\x y -> if x == "" then y else x <> ", " <> y) "" (ISTEX._source_title s)
, _hd_abstract = ab
, _hd_publication_date = fmap (T.pack . show) utctime
, _hd_publication_year = pub_year
......
......@@ -21,11 +21,11 @@ module Gargantext.Core.Text.Corpus.Parsers.Wikidata where
import Control.Lens (makeLenses, (^.) )
import Data.List qualified as List
import Data.Text (concat)
import Database.HSparql.Connection ( BindingValue, EndPoint, selectQueryRaw )
import Database.HSparql.Connection
import Gargantext.Core (Lang(..))
import Gargantext.Core.Text.Corpus.Parsers.Date (mDateSplit)
import Gargantext.Core.Text.Corpus.Parsers.Isidore (unbound)
import Gargantext.Core.Text.Corpus.Parsers.Wikidata.Crawler ( crawlPage )
import Gargantext.Core.Text.Corpus.Parsers.Wikidata.Crawler
import Gargantext.Database.Admin.Types.Hyperdata.Document (HyperdataDocument(..))
import Gargantext.Prelude hiding (concat)
import Prelude qualified
......@@ -57,9 +57,11 @@ wikiPageToDocument m wr = do
let bdd = Just "wikidata"
doi = Nothing
url = wr ^. wr_url
url = (wr ^. wr_url)
uniqId = Nothing
uniqIdBdd = Nothing
page = Nothing
title = wr ^. wr_title
title = (wr ^. wr_title)
authors = Nothing
institutes = Nothing
source = Nothing
......@@ -81,6 +83,8 @@ wikiPageToDocument m wr = do
pure $ HyperdataDocument { _hd_bdd = bdd
, _hd_doi = doi
, _hd_url = url
, _hd_uniqId = uniqId
, _hd_uniqIdBdd = uniqIdBdd
, _hd_page = page
, _hd_title = title
, _hd_authors = authors
......
......@@ -167,7 +167,7 @@ instance Semigroup TokenTag where
instance Monoid TokenTag where
mempty = TokenTag [] empty Nothing Nothing
mconcat = foldl' mappend mempty
mconcat = foldl mappend mempty
-- mappend t1 t2 = (<>) t1 t2
......
......@@ -9,21 +9,22 @@ Portability : POSIX
-}
{-# LANGUAGE DeriveAnyClass #-}
module Gargantext.Core.Types.Search where
import Data.Aeson hiding (defaultTaggedObject)
import Data.Swagger ( ToSchema(..), genericDeclareNamedSchema )
import Data.Swagger hiding (fieldLabelModifier, Contact)
import Data.Text qualified as Text
import Data.Time (UTCTime)
import Gargantext.Core.Utils.Prefix (dropPrefix, unCapitalize, unPrefixSwagger)
import Gargantext.Database.Admin.Types.Hyperdata.Contact ( ContactWhere(..), HyperdataContact(..), ContactWho(..) )
import Gargantext.Database.Admin.Types.Hyperdata.Document ( HyperdataDocument(..) )
import Gargantext.Database.Admin.Types.Node ( NodeId )
import Gargantext.Database.Admin.Types.Hyperdata (ContactWhere(..), HyperdataContact(..), HyperdataDocument(..), ContactWho(..))
import Gargantext.Database.Admin.Types.Node
import Gargantext.Database.Query.Facet.Types (Facet(..), FacetDoc, FacetPaired(..))
import Gargantext.Defaults qualified as Defaults
import Gargantext.Prelude
import Gargantext.Utils.Aeson (defaultTaggedObject)
import Test.QuickCheck.Arbitrary ( Arbitrary(arbitrary) )
import Test.QuickCheck.Arbitrary
data Row =
......@@ -92,6 +93,8 @@ data HyperdataRow =
, _hr_source :: !Text
, _hr_title :: !Text
, _hr_url :: !Text
, _hr_uniqId :: !Text
, _hr_uniqIdBdd :: !Text
}
| HyperdataRowContact { _hr_firstname :: !Text
, _hr_lastname :: !Text
......@@ -145,7 +148,9 @@ instance ToHyperdataRow HyperdataDocument where
, _hr_publication_second = fromMaybe 0 _hd_publication_second
, _hr_source = fromMaybe "" _hd_source
, _hr_title = fromMaybe "Title" _hd_title
, _hr_url = fromMaybe "" _hd_url }
, _hr_url = fromMaybe "" _hd_url
, _hr_uniqId = fromMaybe "" _hd_uniqId
, _hr_uniqIdBdd = fromMaybe "" _hd_uniqIdBdd }
instance ToHyperdataRow HyperdataContact where
toHyperdataRow (HyperdataContact { _hc_who = Just (ContactWho _ fn ln _ _ _), _hc_where = ou} ) =
......
......@@ -11,6 +11,7 @@ Portability : POSIX
{-# LANGUAGE ConstraintKinds #-}
{-# LANGUAGE ConstrainedClassMethods #-}
{-# LANGUAGE ConstraintKinds #-}
{-# LANGUAGE InstanceSigs #-}
{-# LANGUAGE TemplateHaskell #-}
......@@ -51,7 +52,9 @@ type FlowCmdM env err m =
, MonadLogger m
)
type FlowCorpus a = ( UniqParameters a
type FlowCorpus a = ( AddUniqId a
, UniqId a
, UniqParameters a
, InsertDb a
, ExtractNgramsT a
, HasText a
......
......@@ -16,6 +16,7 @@ Portability : POSIX
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE FunctionalDependencies #-}
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
{-# LANGUAGE MultiParamTypeClasses #-}
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE RankNTypes #-}
......@@ -31,7 +32,7 @@ import Gargantext.API.GraphQL.Utils qualified as GAGU
import Gargantext.Core.Text (HasText(..))
import Gargantext.Database.Admin.Types.Hyperdata.Prelude
import Gargantext.Prelude
import Gargantext.Utils.UTCTime ( NUTCTime(..) )
import Gargantext.Utils.UTCTime
--------------------------------------------------------------------------------
data HyperdataContact =
......@@ -41,6 +42,8 @@ data HyperdataContact =
, _hc_title :: Maybe Text -- TODO remove (only demo)
, _hc_source :: Maybe Text -- TODO remove (only demo)
, _hc_lastValidation :: Maybe Text -- TODO UTCTime
, _hc_uniqIdBdd :: Maybe Text
, _hc_uniqId :: Maybe Text
} deriving (Eq, Show, Generic)
instance GQLType HyperdataContact where
......@@ -58,7 +61,9 @@ defaultHyperdataContact =
, _hc_where = [defaultContactWhere]
, _hc_title =Just "Title"
, _hc_source = Just "Source"
, _hc_lastValidation = Just "TODO lastValidation date" }
, _hc_lastValidation = Just "TODO lastValidation date"
, _hc_uniqIdBdd = Just "DO NOT expose this"
, _hc_uniqId = Just "DO NOT expose this" }
hyperdataContact :: FirstName -> LastName -> HyperdataContact
hyperdataContact fn ln =
......@@ -68,7 +73,9 @@ hyperdataContact fn ln =
, _hc_where = []
, _hc_title = Nothing
, _hc_source = Nothing
, _hc_lastValidation = Nothing }
, _hc_lastValidation = Nothing
, _hc_uniqIdBdd = Nothing
, _hc_uniqId = Nothing }
-- TOD0 contact metadata (Type is too flat)
data ContactMetaData =
......@@ -87,7 +94,9 @@ arbitraryHyperdataContact =
, _hc_where = []
, _hc_title = Nothing
, _hc_source = Nothing
, _hc_lastValidation = Nothing }
, _hc_lastValidation = Nothing
, _hc_uniqIdBdd = Nothing
, _hc_uniqId = Nothing }
data ContactWho =
......@@ -179,7 +188,7 @@ instance ToSchema ContactMetaData where
-- | Arbitrary instances
instance Arbitrary HyperdataContact where
arbitrary = elements [ HyperdataContact Nothing Nothing [] Nothing Nothing Nothing ]
arbitrary = elements [HyperdataContact Nothing Nothing [] Nothing Nothing Nothing Nothing Nothing]
-- | Specific Gargantext instance
instance Hyperdata HyperdataContact
......
......@@ -29,6 +29,8 @@ import Gargantext.Database.Admin.Types.Hyperdata.Prelude
data HyperdataDocument = HyperdataDocument { _hd_bdd :: !(Maybe Text)
, _hd_doi :: !(Maybe Text)
, _hd_url :: !(Maybe Text)
, _hd_uniqId :: !(Maybe Text)
, _hd_uniqIdBdd :: !(Maybe Text)
, _hd_page :: !(Maybe Int)
, _hd_title :: !(Maybe Text)
, _hd_authors :: !(Maybe Text)
......@@ -56,7 +58,7 @@ instance HasText HyperdataDocument
defaultHyperdataDocument :: HyperdataDocument
defaultHyperdataDocument = case decode docExample of
Just hp -> hp
Nothing -> HyperdataDocument Nothing Nothing
Nothing -> HyperdataDocument Nothing Nothing Nothing Nothing
Nothing Nothing Nothing Nothing
Nothing Nothing Nothing Nothing
Nothing Nothing Nothing Nothing
......@@ -105,8 +107,7 @@ instance ToHyperdataDocument HyperdataDocument
------------------------------------------------------------------------
instance Eq HyperdataDocument where
(==) h1 h2 = _hd_title h1 == _hd_title h2
&& _hd_abstract h1 == _hd_abstract h2
(==) h1 h2 = (==) (_hd_uniqId h1) (_hd_uniqId h2)
------------------------------------------------------------------------
instance Ord HyperdataDocument where
......@@ -125,7 +126,7 @@ arbitraryHyperdataDocuments =
] :: [(Text, Text)])
where
toHyperdataDocument' (t1,t2) =
HyperdataDocument Nothing Nothing Nothing Nothing (Just t1)
HyperdataDocument Nothing Nothing Nothing Nothing Nothing Nothing (Just t1)
Nothing Nothing (Just t2) Nothing Nothing Nothing Nothing Nothing
Nothing Nothing Nothing Nothing
......
......@@ -57,12 +57,15 @@ the concatenation of the parameters defined by @shaParameters@.
module Gargantext.Database.Query.Table.Node.Document.Insert
where
import Control.Lens (set, view)
import Control.Lens.Cons
import Control.Lens.Prism
import Data.Aeson (toJSON, ToJSON)
import Data.Text qualified as DT (pack, concat, take, filter, toLower)
import Data.Time.Segment (jour)
import Database.PostgreSQL.Simple (FromRow, Query, Only(..))
import Database.PostgreSQL.Simple.FromRow (fromRow, field)
import Database.PostgreSQL.Simple.SqlQQ ( sql )
import Database.PostgreSQL.Simple.SqlQQ
import Database.PostgreSQL.Simple.ToField (toField, Action{-, ToField-})
import Database.PostgreSQL.Simple.Types (Values(..), QualifiedIdentifier(..))
import Gargantext.Core (HasDBid(toDBid))
......@@ -90,7 +93,7 @@ import Database.PostgreSQL.Simple (formatQuery)
insertDb :: (InsertDb a, HasDBid NodeType) => UserId -> Maybe ParentId -> [a] -> DBCmd err [ReturnId]
insertDb u p = runPGSQuery queryInsert . Only . Values fields . map (insertDb' u p)
where
fields = map (QualifiedIdentifier Nothing) inputSqlTypes
fields = map (\t-> QualifiedIdentifier Nothing t) inputSqlTypes
class InsertDb a
where
......@@ -105,18 +108,18 @@ instance InsertDb HyperdataDocument
, toField p
, toField $ maybe "No Title" (DT.take 255) (_hd_title h)
, toField $ _hd_publication_date h -- TODO USE UTCTime
-- , (toField . toJSON) (addUniqId h)
, (toField . toJSON) (addUniqId h)
]
instance InsertDb HyperdataContact
where
insertDb' u p _h = [ toField ("" :: Text)
insertDb' u p h = [ toField ("" :: Text)
, toField $ toDBid NodeContact
, toField u
, toField p
, toField $ maybe "Contact" (DT.take 255) (Just "Name") -- (_hc_name h)
, toField $ jour 0 1 1 -- TODO put default date
-- , (toField . toJSON) (addUniqId h)
, (toField . toJSON) (addUniqId h)
]
instance ToJSON a => InsertDb (Node a)
......@@ -191,73 +194,73 @@ class AddUniqId a
where
addUniqId :: a -> a
-- instance AddUniqId HyperdataDocument
-- where
-- addUniqId = addUniqIdsDoc
-- where
-- addUniqIdsDoc :: HyperdataDocument -> HyperdataDocument
-- addUniqIdsDoc doc = set hd_uniqIdBdd (Just shaBdd)
-- $ set hd_uniqId (Just shaUni) doc
-- where
-- shaUni = hash $ DT.concat $ map ($ doc) shaParametersDoc
-- shaBdd = hash $ DT.concat $ map ($ doc) ([maybeText . _hd_bdd] <> shaParametersDoc)
-- shaParametersDoc :: [HyperdataDocument -> Text]
-- shaParametersDoc = [ filterText . maybeText . _hd_title
-- , filterText . maybeText . _hd_abstract
-- , filterText . maybeText . _hd_source
-- -- , \d -> maybeText (_hd_publication_date d)
-- ]
class UniqParameters a
where
uniqParameters :: a -> Text
uniqParameters :: ParentId -> a -> Text
instance AddUniqId HyperdataDocument
where
addUniqId = addUniqIdsDoc
where
addUniqIdsDoc :: HyperdataDocument -> HyperdataDocument
addUniqIdsDoc doc = set hd_uniqIdBdd (Just shaBdd)
$ set hd_uniqId (Just shaUni) doc
where
shaUni = hash $ DT.concat $ map ($ doc) shaParametersDoc
shaBdd = hash $ DT.concat $ map ($ doc) ([(\d -> maybeText (_hd_bdd d))] <> shaParametersDoc)
shaParametersDoc :: [(HyperdataDocument -> Text)]
shaParametersDoc = [ \d -> filterText $ maybeText (_hd_title d)
, \d -> filterText $ maybeText (_hd_abstract d)
, \d -> filterText $ maybeText (_hd_source d)
-- , \d -> maybeText (_hd_publication_date d)
]
instance UniqParameters HyperdataDocument
where
uniqParameters h = filterText $ DT.concat $ map maybeText $ [_hd_title h, _hd_abstract h, _hd_source h]
uniqParameters _ h = filterText $ DT.concat $ map maybeText $ [_hd_title h, _hd_abstract h, _hd_source h]
instance UniqParameters HyperdataContact
where
uniqParameters _ = ""
uniqParameters _ _ = ""
instance UniqParameters (Node a)
where
uniqParameters _ = undefined
uniqParameters _ _ = undefined
filterText :: Text -> Text
filterText = DT.toLower . DT.filter isAlphaNum
filterText = DT.toLower . (DT.filter isAlphaNum)
instance (UniqParameters a, ToJSON a, HasDBid NodeType) => AddUniqId (Node a)
where
addUniqId (Node nid _ t u p n d h) = Node nid (Just newHash) t u p n d h
where
newHash = "\\x" <> hash (uniqParameters h)
newHash = "\\x" <> (hash $ uniqParameters (fromMaybe 0 p) h)
---------------------------------------------------------------------------
-- * Uniqueness of document definition
-- TODO factorize with above (use the function below for tests)
-- instance AddUniqId HyperdataContact
-- where
-- addUniqId = addUniqIdsContact
instance AddUniqId HyperdataContact
where
addUniqId = addUniqIdsContact
-- addUniqIdsContact :: HyperdataContact -> HyperdataContact
-- addUniqIdsContact hc = set hc_uniqIdBdd (Just shaBdd)
-- $ set hc_uniqId (Just shaUni) hc
-- where
addUniqIdsContact :: HyperdataContact -> HyperdataContact
addUniqIdsContact hc = set (hc_uniqIdBdd) (Just shaBdd)
$ set (hc_uniqId ) (Just shaUni) hc
where
shaUni = hash $ DT.concat $ map ($ hc) shaParametersContact
shaBdd = hash $ DT.concat $ map ($ hc) ([\d -> maybeText (view hc_bdd d)] <> shaParametersContact)
-- shaBdd = hash $ DT.concat $ map ($ hc) ([maybeText . view hc_bdd] <> shaParametersContact)
-- -- | TODO add more shaparameters
-- shaParametersContact :: [HyperdataContact -> Text]
-- shaParametersContact = [ maybeText . view (hc_who . _Just . cw_firstName )
-- , maybeText . view (hc_who . _Just . cw_lastName )
-- | TODO add more shaparameters
shaParametersContact :: [(HyperdataContact -> Text)]
shaParametersContact = [ \d -> maybeText $ view (hc_who . _Just . cw_firstName ) d
, \d -> maybeText $ view (hc_who . _Just . cw_lastName ) d
, \d -> maybeText $ view (hc_where . _head . cw_touch . _Just . ct_mail) d
]
maybeText :: Maybe Text -> Text
......@@ -283,7 +286,7 @@ instance ToNode HyperdataDocument where
-- TODO better Node
instance ToNode HyperdataContact where
toNode u p = Node 0 Nothing (toDBid NodeContact) u p "Contact" date
toNode u p h = Node 0 Nothing (toDBid NodeContact) u p "Contact" date h
where
date = jour 2020 01 01
......
......@@ -65,6 +65,7 @@ exampleDocument_01 = either error id $ parseEither parseJSON $ [aesonQQ|
exampleDocument_02 :: HyperdataDocument
exampleDocument_02 = either error id $ parseEither parseJSON $ [aesonQQ|
{ "doi":""
, "uniqId": "1405.3072v3"
, "bdd": "Arxiv"
, "publication_day":6
, "language_iso2":"EN"
......@@ -89,6 +90,7 @@ exampleDocument_03 = either error id $ parseEither parseJSON $ [aesonQQ|
, "url": "http://arxiv.org/pdf/1405.3072v2"
, "title": "Haskell for OCaml programmers"
, "source": ""
, "uniqId": "1405.3072v2"
, "authors": "Raphael Poss, Herbert Ballerina"
, "abstract": " This introduction to Haskell is written to optimize learning by programmers who already know OCaml. "
, "institutes": ""
......@@ -106,6 +108,7 @@ exampleDocument_04 = either error id $ parseEither parseJSON $ [aesonQQ|
, "url": "http://arxiv.org/pdf/1407.5670v1"
, "title": "Rust for functional programmers"
, "source": ""
, "uniqId": "1407.5670v1"
, "authors": "Raphael Poss"
, "abstract": " This article provides an introduction to Rust , a systems language by Mozilla , to programmers already familiar with Haskell , OCaml or other functional languages. " , "institutes": ""
, "language_iso2": "EN"
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment