Commit dc52c519 authored by Przemyslaw Kaminski's avatar Przemyslaw Kaminski

Merge branch 'dev' into 151-dev-pubmed-api-key

parents c38275d0 93e711b1
## Version 0.0.6.8.5.1
* [BACK][FIX] Indexing issue: taking all terms instead of longest of terms in case of ngrams included in others
* [FRONT][FIX][Disconnection of instance causes a blank page (#464)](https://gitlab.iscpif.fr/gargantext/purescript-gargantext/issues/464)
* [BACK][FIX] ArXiv search in Abstracts by default
## Version 0.0.6.8.5 ## Version 0.0.6.8.5
* [BACK][FIX][Ngrams Table, page sort / limit (#149)](https://gitlab.iscpif.fr/gargantext/haskell-gargantext/issues/149) * [BACK][FIX][Ngrams Table, page sort / limit (#149)](https://gitlab.iscpif.fr/gargantext/haskell-gargantext/issues/149)
* [FRONT][FIX][Security Issue with Teams (#452)](https://gitlab.iscpif.fr/gargantext/purescript-gargantext/issues/452) * [FRONT][FIX][Security Issue with Teams (#452)](https://gitlab.iscpif.fr/gargantext/purescript-gargantext/issues/452)
...@@ -9,6 +15,7 @@ ...@@ -9,6 +15,7 @@
## Version 0.0.6.8.3 ## Version 0.0.6.8.3
* [INFO][UPGRADE] To upgrade use the 0.0.6.8.3.sql script
* [FRONT][SECU][Security Issue with Teams (#452)](https://gitlab.iscpif.fr/gargantext/purescript-gargantext/issues/452) * [FRONT][SECU][Security Issue with Teams (#452)](https://gitlab.iscpif.fr/gargantext/purescript-gargantext/issues/452)
* [FRONT][FEAT][Graph Explorer: disable controls when ForceAtlas is running (#451)](https://gitlab.iscpif.fr/gargantext/purescript-gargantext/issues/451) * [FRONT][FEAT][Graph Explorer: disable controls when ForceAtlas is running (#451)](https://gitlab.iscpif.fr/gargantext/purescript-gargantext/issues/451)
* [FRONT][FIX][[Doc date filter] Filter by multiple dates (#450)](https://gitlab.iscpif.fr/gargantext/purescript-gargantext/issues/450) * [FRONT][FIX][[Doc date filter] Filter by multiple dates (#450)](https://gitlab.iscpif.fr/gargantext/purescript-gargantext/issues/450)
......
...@@ -196,6 +196,8 @@ To build documentation, run: ...@@ -196,6 +196,8 @@ To build documentation, run:
stack --docker build --haddock --no-haddock-deps --fast stack --docker build --haddock --no-haddock-deps --fast
``` ```
(in `.stack-work/dist/x86_64-linux-nix/Cabal-3.2.1.0/doc/html/gargantext`).
## GraphQL ## GraphQL
Some introspection information. Some introspection information.
......
...@@ -33,7 +33,6 @@ import Gargantext.Database.Admin.Trigger.Init ...@@ -33,7 +33,6 @@ import Gargantext.Database.Admin.Trigger.Init
import Gargantext.Database.Admin.Types.Hyperdata (HyperdataCorpus) import Gargantext.Database.Admin.Types.Hyperdata (HyperdataCorpus)
import Gargantext.Database.Admin.Types.Node (NodeType(NodeDocument, NodeContact)) import Gargantext.Database.Admin.Types.Node (NodeType(NodeDocument, NodeContact))
import Gargantext.Database.Prelude (Cmd'', Cmd, execPGSQuery) import Gargantext.Database.Prelude (Cmd'', Cmd, execPGSQuery)
import Gargantext.Database.Query.Table.Node (getOrMkList)
import Gargantext.Prelude import Gargantext.Prelude
import Gargantext.Prelude.Config (GargConfig(..), readConfig) import Gargantext.Prelude.Config (GargConfig(..), readConfig)
import Prelude (getLine) import Prelude (getLine)
...@@ -67,7 +66,7 @@ main = do ...@@ -67,7 +66,7 @@ main = do
_ok <- getLine _ok <- getLine
cfg <- readConfig iniPath cfg <- readConfig iniPath
let secret = _gc_secretkey cfg let _secret = _gc_secretkey cfg
withDevEnv iniPath $ \env -> do withDevEnv iniPath $ \env -> do
-- First upgrade the Database Schema -- First upgrade the Database Schema
......
...@@ -21,10 +21,11 @@ services: ...@@ -21,10 +21,11 @@ services:
postgres: postgres:
#image: 'postgres:latest' #image: 'postgres:latest'
image: 'postgres:14' image: 'postgres:14'
shm_size: 1g # https://stackoverflow.com/a/56754077
network_mode: host network_mode: host
#command: ["postgres", "-c", "log_statement=all"] #command: ["postgres", "-c", "log_statement=all"]
#ports: #ports:
#- 5432:5432 # - 5432:5432
environment: environment:
POSTGRES_USER: gargantua POSTGRES_USER: gargantua
POSTGRES_PASSWORD: C8kdcUrAQy66U POSTGRES_PASSWORD: C8kdcUrAQy66U
......
...@@ -164,6 +164,7 @@ CREATE TABLE public.context_node_ngrams ( ...@@ -164,6 +164,7 @@ CREATE TABLE public.context_node_ngrams (
ngrams_id INTEGER NOT NULL REFERENCES public.ngrams (id) ON DELETE CASCADE, ngrams_id INTEGER NOT NULL REFERENCES public.ngrams (id) ON DELETE CASCADE,
ngrams_type INTEGER , ngrams_type INTEGER ,
weight double precision, weight double precision,
doc_count INTEGER DEFAULT 0,
PRIMARY KEY (context_id, node_id, ngrams_id, ngrams_type) PRIMARY KEY (context_id, node_id, ngrams_id, ngrams_type)
); );
ALTER TABLE public.context_node_ngrams OWNER TO gargantua; ALTER TABLE public.context_node_ngrams OWNER TO gargantua;
......
ALTER TABLE context_node_ngrams
ADD COLUMN doc_count INTEGER DEFAULT 0;
...@@ -5,7 +5,7 @@ cabal-version: 1.12 ...@@ -5,7 +5,7 @@ cabal-version: 1.12
-- see: https://github.com/sol/hpack -- see: https://github.com/sol/hpack
name: gargantext name: gargantext
version: 0.0.6.8.5 version: 0.0.6.8.5.1
synopsis: Search, map, share synopsis: Search, map, share
description: Please see README.md description: Please see README.md
category: Data category: Data
...@@ -50,6 +50,7 @@ library ...@@ -50,6 +50,7 @@ library
Gargantext.Core.Types Gargantext.Core.Types
Gargantext.Core.Types.Individu Gargantext.Core.Types.Individu
Gargantext.Core.Types.Main Gargantext.Core.Types.Main
Gargantext.Core.Utils
Gargantext.Core.Utils.Prefix Gargantext.Core.Utils.Prefix
Gargantext.Utils.Jobs Gargantext.Utils.Jobs
Gargantext.Utils.Jobs.API Gargantext.Utils.Jobs.API
...@@ -87,6 +88,7 @@ library ...@@ -87,6 +88,7 @@ library
Gargantext.Core.Text.Prepare Gargantext.Core.Text.Prepare
Gargantext.Core.Text.Search Gargantext.Core.Text.Search
Gargantext.Core.Text.Terms Gargantext.Core.Text.Terms
Gargantext.Core.Text.Terms.Eleve
Gargantext.Core.Text.Terms.Mono Gargantext.Core.Text.Terms.Mono
Gargantext.Core.Text.Terms.Multi.Lang.En Gargantext.Core.Text.Terms.Multi.Lang.En
Gargantext.Core.Text.Terms.Multi.Lang.Fr Gargantext.Core.Text.Terms.Multi.Lang.Fr
...@@ -211,7 +213,6 @@ library ...@@ -211,7 +213,6 @@ library
Gargantext.Core.Text.Samples.EN Gargantext.Core.Text.Samples.EN
Gargantext.Core.Text.Samples.FR Gargantext.Core.Text.Samples.FR
Gargantext.Core.Text.Samples.SP Gargantext.Core.Text.Samples.SP
Gargantext.Core.Text.Terms.Eleve
Gargantext.Core.Text.Terms.Mono.Stem Gargantext.Core.Text.Terms.Mono.Stem
Gargantext.Core.Text.Terms.Mono.Stem.En Gargantext.Core.Text.Terms.Mono.Stem.En
Gargantext.Core.Text.Terms.Mono.Token Gargantext.Core.Text.Terms.Mono.Token
...@@ -221,7 +222,6 @@ library ...@@ -221,7 +222,6 @@ library
Gargantext.Core.Text.Terms.Multi.PosTagging Gargantext.Core.Text.Terms.Multi.PosTagging
Gargantext.Core.Text.Terms.Multi.PosTagging.Types Gargantext.Core.Text.Terms.Multi.PosTagging.Types
Gargantext.Core.Types.Phylo Gargantext.Core.Types.Phylo
Gargantext.Core.Utils
Gargantext.Core.Utils.DateUtils Gargantext.Core.Utils.DateUtils
Gargantext.Core.Viz Gargantext.Core.Viz
Gargantext.Core.Viz.Chart Gargantext.Core.Viz.Chart
...@@ -809,6 +809,7 @@ test-suite garg-test ...@@ -809,6 +809,7 @@ test-suite garg-test
Core.Text Core.Text
Core.Text.Examples Core.Text.Examples
Core.Text.Flow Core.Text.Flow
Core.Utils
Graph.Clustering Graph.Clustering
Graph.Distance Graph.Distance
Ngrams.Lang Ngrams.Lang
......
...@@ -6,7 +6,7 @@ name: gargantext ...@@ -6,7 +6,7 @@ name: gargantext
# | | | +----- Layers * : New versions with API additions # | | | +----- Layers * : New versions with API additions
# | | | | +--- Layers * : New versions without API breaking changes # | | | | +--- Layers * : New versions without API breaking changes
# | | | | | # | | | | |
version: '0.0.6.8.5' version: '0.0.6.8.5.1'
synopsis: Search, map, share synopsis: Search, map, share
description: Please see README.md description: Please see README.md
category: Data category: Data
...@@ -74,6 +74,7 @@ library: ...@@ -74,6 +74,7 @@ library:
- Gargantext.Core.Types - Gargantext.Core.Types
- Gargantext.Core.Types.Individu - Gargantext.Core.Types.Individu
- Gargantext.Core.Types.Main - Gargantext.Core.Types.Main
- Gargantext.Core.Utils
- Gargantext.Core.Utils.Prefix - Gargantext.Core.Utils.Prefix
- Gargantext.Utils.Jobs - Gargantext.Utils.Jobs
- Gargantext.Utils.Jobs.API - Gargantext.Utils.Jobs.API
...@@ -111,6 +112,7 @@ library: ...@@ -111,6 +112,7 @@ library:
- Gargantext.Core.Text.Prepare - Gargantext.Core.Text.Prepare
- Gargantext.Core.Text.Search - Gargantext.Core.Text.Search
- Gargantext.Core.Text.Terms - Gargantext.Core.Text.Terms
- Gargantext.Core.Text.Terms.Eleve
- Gargantext.Core.Text.Terms.Mono - Gargantext.Core.Text.Terms.Mono
- Gargantext.Core.Text.Terms.Multi.Lang.En - Gargantext.Core.Text.Terms.Multi.Lang.En
- Gargantext.Core.Text.Terms.Multi.Lang.Fr - Gargantext.Core.Text.Terms.Multi.Lang.Fr
......
{-| {-|
Module : Graph.Clustering Module : Graph.Clustering
Description : Basic tests to avoid quick regression Description : Basic tests to avoid quick regression
......
{-|
Module : Core.Utils
Description :
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
module Core.Utils where
import Test.Hspec
import Gargantext.Prelude
import Gargantext.Core.Utils
-- | Core.Utils tests
test :: IO ()
test = hspec $ do
describe "check if groupWithCounts works" $ do
it "simple integer array" $ do
(groupWithCounts [1, 2, 3, 1, 2, 3]) `shouldBe` [(1, 2), (2, 2), (3, 2)]
it "string" $ do
(groupWithCounts "abccba") `shouldBe` [('a', 2), ('b', 2), ('c', 2)]
{-| {-|
Module : Graph.Clustering Module : Graph.Clustering
Description : Basic tests to avoid quick regression Description : Basic tests to avoid quick regression
...@@ -40,4 +39,3 @@ test = hspec $ do ...@@ -40,4 +39,3 @@ test = hspec $ do
let let
result = List.length partitions > 1 result = List.length partitions > 1
shouldBe True result shouldBe True result
...@@ -11,6 +11,8 @@ Portability : POSIX ...@@ -11,6 +11,8 @@ Portability : POSIX
import Gargantext.Prelude import Gargantext.Prelude
import Gargantext.Core (Lang(..)) import Gargantext.Core (Lang(..))
import qualified Core.Utils as Utils
--import qualified Ngrams.Lang.Fr as Fr --import qualified Ngrams.Lang.Fr as Fr
--import qualified Ngrams.Lang as Lang --import qualified Ngrams.Lang as Lang
import qualified Ngrams.Lang.Occurrences as Occ import qualified Ngrams.Lang.Occurrences as Occ
...@@ -22,11 +24,12 @@ import qualified Utils.Crypto as Crypto ...@@ -22,11 +24,12 @@ import qualified Utils.Crypto as Crypto
main :: IO () main :: IO ()
main = do main = do
Utils.test
-- Occ.parsersTest -- Occ.parsersTest
-- Lang.ngramsExtractionTest FR -- Lang.ngramsExtractionTest FR
-- Lang.ngramsExtractionTest EN -- Lang.ngramsExtractionTest EN
-- Metrics.main -- Metrics.main
Graph.test Graph.test
PD.testFromRFC3339 PD.testFromRFC3339
-- GD.test -- GD.test
Crypto.test Crypto.test
{-| {-|
Module : Utils.Crypto Module : Utils.Crypto
Description : Description :
Copyright : (c) CNRS, 2017-Present Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3 License : AGPL + CECILL v3
Maintainer : team@gargantext.org Maintainer : team@gargantext.org
...@@ -43,4 +42,3 @@ test = hspec $ do ...@@ -43,4 +42,3 @@ test = hspec $ do
let hash2 = hash (["b","a"] :: [Text]) let hash2 = hash (["b","a"] :: [Text])
it "compare" $ do it "compare" $ do
hash1 `shouldBe` hash2 hash1 `shouldBe` hash2
...@@ -187,9 +187,9 @@ saveNodeStory :: ( MonadReader env m, MonadBase IO m, HasNodeStorySaver env ) ...@@ -187,9 +187,9 @@ saveNodeStory :: ( MonadReader env m, MonadBase IO m, HasNodeStorySaver env )
saveNodeStory = do saveNodeStory = do
saver <- view hasNodeStorySaver saver <- view hasNodeStorySaver
liftBase $ do liftBase $ do
Gargantext.Prelude.putStrLn "---- Running node story saver ----" --Gargantext.Prelude.putStrLn "---- Running node story saver ----"
saver saver
Gargantext.Prelude.putStrLn "---- Node story saver finished ----" --Gargantext.Prelude.putStrLn "---- Node story saver finished ----"
saveNodeStoryImmediate :: ( MonadReader env m, MonadBase IO m, HasNodeStoryImmediateSaver env ) saveNodeStoryImmediate :: ( MonadReader env m, MonadBase IO m, HasNodeStoryImmediateSaver env )
...@@ -197,9 +197,9 @@ saveNodeStoryImmediate :: ( MonadReader env m, MonadBase IO m, HasNodeStoryImmed ...@@ -197,9 +197,9 @@ saveNodeStoryImmediate :: ( MonadReader env m, MonadBase IO m, HasNodeStoryImmed
saveNodeStoryImmediate = do saveNodeStoryImmediate = do
saver <- view hasNodeStoryImmediateSaver saver <- view hasNodeStoryImmediateSaver
liftBase $ do liftBase $ do
Gargantext.Prelude.putStrLn "---- Running node story immediate saver ----" --Gargantext.Prelude.putStrLn "---- Running node story immediate saver ----"
saver saver
Gargantext.Prelude.putStrLn "---- Node story immediate saver finished ----" --Gargantext.Prelude.putStrLn "---- Node story immediate saver finished ----"
listTypeConflictResolution :: ListType -> ListType -> ListType listTypeConflictResolution :: ListType -> ListType -> ListType
......
...@@ -130,26 +130,23 @@ reIndexWith :: ( HasNodeStory env err m ...@@ -130,26 +130,23 @@ reIndexWith :: ( HasNodeStory env err m
-> Set ListType -> Set ListType
-> m () -> m ()
reIndexWith cId lId nt lts = do reIndexWith cId lId nt lts = do
printDebug "(cId,lId,nt,lts)" (cId, lId, nt, lts)
-- Getting [NgramsTerm] -- Getting [NgramsTerm]
ts <- List.concat ts <- List.concat
<$> map (\(k,vs) -> k:vs) <$> map (\(k,vs) -> k:vs)
<$> HashMap.toList <$> HashMap.toList
<$> getTermsWith identity [lId] nt lts <$> getTermsWith identity [lId] nt lts
-- printDebug "ts" ts
-- Taking the ngrams with 0 occurrences only (orphans)
-- occs <- getOccByNgramsOnlyFast' cId lId nt ts
-- printDebug "occs" occs
let orphans = ts {- List.concat let orphans = ts {- List.concat
$ map (\t -> case HashMap.lookup t occs of $ map (\t -> case HashMap.lookup t occs of
Nothing -> [t] Nothing -> [t]
Just n -> if n <= 1 then [t] else [ ] Just n -> if n <= 1 then [t] else [ ]
) ts ) ts
-} -}
-- printDebug "orphans" orphans
printDebug "orphans" orphans
-- Get all documents of the corpus -- Get all documents of the corpus
docs <- selectDocNodes cId docs <- selectDocNodes cId
...@@ -158,24 +155,25 @@ reIndexWith cId lId nt lts = do ...@@ -158,24 +155,25 @@ reIndexWith cId lId nt lts = do
-- Checking Text documents where orphans match -- Checking Text documents where orphans match
-- TODO Tests here -- TODO Tests here
let let
ngramsByDoc = map (HashMap.fromListWith (<>)) -- fromListWith (<>)
$ map (map (\(k,v) -> (SimpleNgrams (text2ngrams k), v))) ngramsByDoc = map (HashMap.fromList)
$ map (\doc -> List.zip $ map (map (\((k, cnt), v) -> (SimpleNgrams (text2ngrams k), over (traverse . traverse) (\p -> (p, cnt)) v)))
(termsInText (buildPatterns $ map (\k -> (Text.splitOn " " $ unNgramsTerm k, [])) orphans) $ map (\doc -> List.zip
$ Text.unlines $ catMaybes (termsInText (buildPatterns $ map (\k -> (Text.splitOn " " $ unNgramsTerm k, [])) orphans)
[ doc ^. context_hyperdata . hd_title $ Text.unlines $ catMaybes
, doc ^. context_hyperdata . hd_abstract [ doc ^. context_hyperdata . hd_title
] , doc ^. context_hyperdata . hd_abstract
]
) )
(List.cycle [Map.fromList $ [(nt, Map.singleton (doc ^. context_id) 1 )]]) (List.cycle [Map.fromList $ [(nt, Map.singleton (doc ^. context_id) 1 )]])
) docs ) docs
-- printDebug "ngramsByDoc" ngramsByDoc printDebug "ngramsByDoc: " ngramsByDoc
-- Saving the indexation in database -- Saving the indexation in database
_ <- mapM (saveDocNgramsWith lId) ngramsByDoc _ <- mapM (saveDocNgramsWith lId) ngramsByDoc
pure () -- ngramsByDoc pure ()
toIndexedNgrams :: HashMap Text NgramsId -> Text -> Maybe (Indexed Int Ngrams) toIndexedNgrams :: HashMap Text NgramsId -> Text -> Maybe (Indexed Int Ngrams)
toIndexedNgrams m t = Indexed <$> i <*> n toIndexedNgrams m t = Indexed <$> i <*> n
......
...@@ -195,7 +195,7 @@ getCoocByNgrams' f (Diagonal diag) m = ...@@ -195,7 +195,7 @@ getCoocByNgrams' f (Diagonal diag) m =
listToCombi identity ks listToCombi identity ks
] ]
where where
ks = HM.keys m ks = HM.keys m
-- TODO k could be either k1 or k2 here -- TODO k could be either k1 or k2 here
...@@ -220,7 +220,7 @@ getCoocByNgrams'' (Diagonal diag) (f1,f2) (m1,m2) = ...@@ -220,7 +220,7 @@ getCoocByNgrams'' (Diagonal diag) (f1,f2) (m1,m2) =
-- TODO check optim -- TODO check optim
-- listToCombi identity ks1 -- listToCombi identity ks1
] ]
where where
ks1 = HM.keys m1 ks1 = HM.keys m1
ks2 = HM.keys m2 ks2 = HM.keys m2
......
...@@ -29,6 +29,7 @@ import Control.Monad.Reader (ReaderT) ...@@ -29,6 +29,7 @@ import Control.Monad.Reader (ReaderT)
import Control.Monad.Error.Class (MonadError(..)) import Control.Monad.Error.Class (MonadError(..))
import Crypto.JOSE.Error as Jose import Crypto.JOSE.Error as Jose
import Data.Aeson.Types import Data.Aeson.Types
import qualified Data.Text as Text
import Data.Typeable import Data.Typeable
import Data.Validity import Data.Validity
import Gargantext.API.Admin.Orchestrator.Types import Gargantext.API.Admin.Orchestrator.Types
...@@ -115,7 +116,7 @@ data GargError ...@@ -115,7 +116,7 @@ data GargError
makePrisms ''GargError makePrisms ''GargError
instance ToJSON GargError where instance ToJSON GargError where
toJSON _ = String "SomeGargErrorPleaseReport" toJSON err = object [("error", String $ Text.pack $ show err)]
instance Exception GargError instance Exception GargError
......
...@@ -289,7 +289,7 @@ addCorpusWithForm user cid = ...@@ -289,7 +289,7 @@ addCorpusWithForm user cid =
serveJobsAPI AddCorpusFormJob $ \i log' -> serveJobsAPI AddCorpusFormJob $ \i log' ->
let let
log'' x = do log'' x = do
printDebug "[addToCorpusWithForm] " x --printDebug "[addToCorpusWithForm] " x
liftBase $ log' x liftBase $ log' x
in New.addToCorpusWithForm user cid i log'' (jobLogInit 3) in New.addToCorpusWithForm user cid i log'' (jobLogInit 3)
......
...@@ -18,6 +18,7 @@ module Gargantext.API.Search ...@@ -18,6 +18,7 @@ module Gargantext.API.Search
where where
import Data.Aeson hiding (defaultTaggedObject) import Data.Aeson hiding (defaultTaggedObject)
-- import Data.List (concat)
import Data.Maybe (fromMaybe) import Data.Maybe (fromMaybe)
import Data.Swagger hiding (fieldLabelModifier, Contact) import Data.Swagger hiding (fieldLabelModifier, Contact)
import Data.Text (Text) import Data.Text (Text)
...@@ -56,6 +57,7 @@ api nId (SearchQuery q SearchDoc) o l order = ...@@ -56,6 +57,7 @@ api nId (SearchQuery q SearchDoc) o l order =
SearchResult <$> SearchResultDoc SearchResult <$> SearchResultDoc
<$> map (toRow nId) <$> map (toRow nId)
<$> searchInCorpus nId False q o l order <$> searchInCorpus nId False q o l order
-- <$> searchInCorpus nId False (concat q) o l order
api nId (SearchQuery q SearchContact) o l order = do api nId (SearchQuery q SearchContact) o l order = do
printDebug "isPairedWith" nId printDebug "isPairedWith" nId
...@@ -69,11 +71,13 @@ api nId (SearchQuery q SearchContact) o l order = do ...@@ -69,11 +71,13 @@ api nId (SearchQuery q SearchContact) o l order = do
<$> map (toRow aId) <$> map (toRow aId)
<$> searchInCorpusWithContacts nId aId q o l order <$> searchInCorpusWithContacts nId aId q o l order
api _nId (SearchQuery _q SearchDocWithNgrams) _o _l _order = undefined
----------------------------------------------------------------------- -----------------------------------------------------------------------
----------------------------------------------------------------------- -----------------------------------------------------------------------
-- | Main Types -- | Main Types
----------------------------------------------------------------------- -----------------------------------------------------------------------
data SearchType = SearchDoc | SearchContact data SearchType = SearchDoc | SearchContact | SearchDocWithNgrams
deriving (Generic) deriving (Generic)
instance FromJSON SearchType where instance FromJSON SearchType where
parseJSON = genericParseJSON (defaultOptions { sumEncoding = ObjectWithSingleField }) parseJSON = genericParseJSON (defaultOptions { sumEncoding = ObjectWithSingleField })
...@@ -132,7 +136,7 @@ instance FromJSON SearchResultTypes where ...@@ -132,7 +136,7 @@ instance FromJSON SearchResultTypes where
parseJSON = genericParseJSON (defaultOptions { sumEncoding = defaultTaggedObject }) parseJSON = genericParseJSON (defaultOptions { sumEncoding = defaultTaggedObject })
instance ToJSON SearchResultTypes where instance ToJSON SearchResultTypes where
toJSON = genericToJSON (defaultOptions { sumEncoding = defaultTaggedObject }) toJSON = genericToJSON (defaultOptions { sumEncoding = defaultTaggedObject })
instance Arbitrary SearchResultTypes where instance Arbitrary SearchResultTypes where
arbitrary = do arbitrary = do
srd <- SearchResultDoc <$> arbitrary srd <- SearchResultDoc <$> arbitrary
...@@ -163,7 +167,7 @@ data Row = ...@@ -163,7 +167,7 @@ data Row =
deriving (Generic) deriving (Generic)
instance FromJSON Row instance FromJSON Row
where where
parseJSON = genericParseJSON parseJSON = genericParseJSON
( defaultOptions { sumEncoding = defaultTaggedObject } ) ( defaultOptions { sumEncoding = defaultTaggedObject } )
instance ToJSON Row instance ToJSON Row
where where
......
...@@ -455,7 +455,7 @@ insertArchiveList c nodeId a = do ...@@ -455,7 +455,7 @@ insertArchiveList c nodeId a = do
where where
query :: PGS.Query query :: PGS.Query
query = [sql| WITH s as (SELECT ? as sid, ? sversion, ? sngrams_type_id, ngrams.id as sngrams_id, ?::jsonb as srepo FROM ngrams WHERE terms = ?) query = [sql| WITH s as (SELECT ? as sid, ? sversion, ? sngrams_type_id, ngrams.id as sngrams_id, ?::jsonb as srepo FROM ngrams WHERE terms = ?)
INSERT INTO node_stories(node_id, version, ngrams_type_id, ngrams_id, ngrams_repo_element) INSERT INTO node_stories(node_id, version, ngrams_type_id, ngrams_id, ngrams_repo_element)
SELECT s.sid, s.sversion, s.sngrams_type_id, s.sngrams_id, s.srepo from s s join nodes n on s.sid = n.id SELECT s.sid, s.sversion, s.sngrams_type_id, s.sngrams_id, s.srepo from s s join nodes n on s.sid = n.id
|] |]
...@@ -505,22 +505,22 @@ updateNodeStory c nodeId@(NodeId _nId) currentArchive newArchive = do ...@@ -505,22 +505,22 @@ updateNodeStory c nodeId@(NodeId _nId) currentArchive newArchive = do
--printDebug "[updateNodeStory] updates" $ Text.unlines $ (Text.pack . show) <$> updates --printDebug "[updateNodeStory] updates" $ Text.unlines $ (Text.pack . show) <$> updates
-- 2. Perform inserts/deletes/updates -- 2. Perform inserts/deletes/updates
printDebug "[updateNodeStory] applying insert" () --printDebug "[updateNodeStory] applying insert" ()
insertArchiveList c nodeId $ Archive { _a_version = newArchive ^. a_version insertArchiveList c nodeId $ Archive { _a_version = newArchive ^. a_version
, _a_history = [] , _a_history = []
, _a_state = archiveStateFromList inserts } , _a_state = archiveStateFromList inserts }
printDebug "[updateNodeStory] insert applied" () --printDebug "[updateNodeStory] insert applied" ()
--TODO Use currentArchive ^. a_version in delete and report error --TODO Use currentArchive ^. a_version in delete and report error
-- if entries with (node_id, ngrams_type_id, ngrams_id) but -- if entries with (node_id, ngrams_type_id, ngrams_id) but
-- different version are found. -- different version are found.
deleteArchiveList c nodeId $ Archive { _a_version = newArchive ^. a_version deleteArchiveList c nodeId $ Archive { _a_version = newArchive ^. a_version
, _a_history = [] , _a_history = []
, _a_state = archiveStateFromList deletes } , _a_state = archiveStateFromList deletes }
printDebug "[updateNodeStory] delete applied" () --printDebug "[updateNodeStory] delete applied" ()
updateArchiveList c nodeId $ Archive { _a_version = newArchive ^. a_version updateArchiveList c nodeId $ Archive { _a_version = newArchive ^. a_version
, _a_history = [] , _a_history = []
, _a_state = archiveStateFromList updates } , _a_state = archiveStateFromList updates }
printDebug "[updateNodeStory] update applied" () --printDebug "[updateNodeStory] update applied" ()
pure () pure ()
-- where -- where
......
...@@ -28,8 +28,9 @@ compute graph ...@@ -28,8 +28,9 @@ compute graph
-} -}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE ConstrainedClassMethods #-} {-# LANGUAGE ConstrainedClassMethods #-}
{-# LANGUAGE StandaloneDeriving #-}
{-# LANGUAGE TemplateHaskell #-}
module Gargantext.Core.Text.Terms module Gargantext.Core.Text.Terms
where where
...@@ -47,6 +48,7 @@ import qualified Data.Set as Set ...@@ -47,6 +48,7 @@ import qualified Data.Set as Set
import qualified Data.Text as Text import qualified Data.Text as Text
import qualified Data.HashMap.Strict as HashMap import qualified Data.HashMap.Strict as HashMap
import Gargantext.Core import Gargantext.Core
import Gargantext.Core.Utils (groupWithCounts)
import Gargantext.Core.Text (sentences, HasText(..)) import Gargantext.Core.Text (sentences, HasText(..))
import Gargantext.Core.Text.Terms.Eleve (mainEleveWith, Tries, Token, buildTries, toToken) import Gargantext.Core.Text.Terms.Eleve (mainEleveWith, Tries, Token, buildTries, toToken)
import Gargantext.Core.Text.Terms.Mono (monoTerms) import Gargantext.Core.Text.Terms.Mono (monoTerms)
...@@ -70,24 +72,23 @@ data TermType lang ...@@ -70,24 +72,23 @@ data TermType lang
, _tt_model :: !(Maybe (Tries Token ())) , _tt_model :: !(Maybe (Tries Token ()))
} }
deriving (Generic) deriving (Generic)
deriving instance (Show lang) => Show (TermType lang)
makeLenses ''TermType makeLenses ''TermType
--group :: [Text] -> [Text] --group :: [Text] -> [Text]
--group = undefined --group = undefined
-- remove Stop Words -- remove Stop Words
-- map (filter (\t -> not . elem t)) $ -- map (filter (\t -> not . elem t)) $
------------------------------------------------------------------------ ------------------------------------------------------------------------
-- | Sugar to extract terms from text (hiddeng mapM from end user). -- | Sugar to extract terms from text (hidding 'mapM' from end user).
--extractTerms :: Traversable t => TermType Lang -> t Text -> IO (t [Terms]) --extractTerms :: Traversable t => TermType Lang -> t Text -> IO (t [Terms])
extractTerms :: TermType Lang -> [Text] -> IO [[Terms]] extractTerms :: TermType Lang -> [Text] -> IO [[TermsWithCount]]
extractTerms (Unsupervised {..}) xs = mapM (terms (Unsupervised { _tt_model = Just m', .. })) xs extractTerms (Unsupervised {..}) xs = mapM (terms (Unsupervised { _tt_model = Just m', .. })) xs
where where
m' = case _tt_model of m' = case _tt_model of
Just m''-> m'' Just m''-> m''
Nothing -> newTries _tt_windowSize (Text.intercalate " " xs) Nothing -> newTries _tt_windowSize (Text.intercalate " " xs)
extractTerms termTypeLang xs = mapM (terms termTypeLang) xs extractTerms termTypeLang xs = mapM (terms termTypeLang) xs
...@@ -116,12 +117,13 @@ data ExtractedNgrams = SimpleNgrams { unSimpleNgrams :: Ngrams } ...@@ -116,12 +117,13 @@ data ExtractedNgrams = SimpleNgrams { unSimpleNgrams :: Ngrams }
instance Hashable ExtractedNgrams instance Hashable ExtractedNgrams
-- | A typeclass that represents extracting ngrams from an entity.
class ExtractNgramsT h class ExtractNgramsT h
where where
extractNgramsT :: HasText h extractNgramsT :: HasText h
=> TermType Lang => TermType Lang
-> h -> h
-> Cmd err (HashMap ExtractedNgrams (Map NgramsType Int)) -> Cmd err (HashMap ExtractedNgrams (Map NgramsType Int, TermsCount))
------------------------------------------------------------------------ ------------------------------------------------------------------------
enrichedTerms :: Lang -> PosTagAlgo -> POS -> Terms -> NgramsPostag enrichedTerms :: Lang -> PosTagAlgo -> POS -> Terms -> NgramsPostag
enrichedTerms l pa po (Terms ng1 ng2) = enrichedTerms l pa po (Terms ng1 ng2) =
...@@ -132,7 +134,7 @@ enrichedTerms l pa po (Terms ng1 ng2) = ...@@ -132,7 +134,7 @@ enrichedTerms l pa po (Terms ng1 ng2) =
------------------------------------------------------------------------ ------------------------------------------------------------------------
cleanNgrams :: Int -> Ngrams -> Ngrams cleanNgrams :: Int -> Ngrams -> Ngrams
cleanNgrams s ng cleanNgrams s ng
| Text.length (ng ^. ngramsTerms) < s = ng | Text.length (ng ^. ngramsTerms) < s = ng
| otherwise = text2ngrams (Text.take s (ng ^. ngramsTerms)) | otherwise = text2ngrams (Text.take s (ng ^. ngramsTerms))
...@@ -151,10 +153,10 @@ insertExtractedNgrams ngs = do ...@@ -151,10 +153,10 @@ insertExtractedNgrams ngs = do
let (s, e) = List.partition isSimpleNgrams ngs let (s, e) = List.partition isSimpleNgrams ngs
m1 <- insertNgrams (map unSimpleNgrams s) m1 <- insertNgrams (map unSimpleNgrams s)
--printDebug "others" m1 --printDebug "others" m1
m2 <- insertNgramsPostag (map unEnrichedNgrams e) m2 <- insertNgramsPostag (map unEnrichedNgrams e)
--printDebug "terms" m2 --printDebug "terms" m2
let result = HashMap.union m1 m2 let result = HashMap.union m1 m2
pure result pure result
...@@ -163,43 +165,41 @@ isSimpleNgrams (SimpleNgrams _) = True ...@@ -163,43 +165,41 @@ isSimpleNgrams (SimpleNgrams _) = True
isSimpleNgrams _ = False isSimpleNgrams _ = False
------------------------------------------------------------------------ ------------------------------------------------------------------------
-- | Terms from Text -- | Terms from 'Text'
-- Mono : mono terms -- 'Mono' : mono terms
-- Multi : multi terms -- 'Multi' : multi terms
-- MonoMulti : mono and multi -- 'MonoMulti' : mono and multi
-- TODO : multi terms should exclude mono (intersection is not empty yet) -- TODO : multi terms should exclude mono (intersection is not empty yet)
terms :: TermType Lang -> Text -> IO [Terms] terms :: TermType Lang -> Text -> IO [TermsWithCount]
terms (Mono lang) txt = pure $ monoTerms lang txt terms (Mono lang) txt = pure $ monoTerms lang txt
terms (Multi lang) txt = multiterms lang txt terms (Multi lang) txt = multiterms lang txt
terms (MonoMulti lang) txt = terms (Multi lang) txt terms (MonoMulti lang) txt = terms (Multi lang) txt
terms (Unsupervised { .. }) txt = termsUnsupervised (Unsupervised { _tt_model = Just m', .. }) txt terms (Unsupervised { .. }) txt = pure $ termsUnsupervised (Unsupervised { _tt_model = Just m', .. }) txt
where where
m' = maybe (newTries _tt_ngramsSize txt) identity _tt_model m' = maybe (newTries _tt_ngramsSize txt) identity _tt_model
-- terms (WithList list) txt = pure . concat $ extractTermsWithList list txt -- terms (WithList list) txt = pure . concat $ extractTermsWithList list txt
------------------------------------------------------------------------ ------------------------------------------------------------------------
-- | Unsupervised ngrams extraction
-- language agnostic extraction
-- TODO: remove IO
-- TODO: newtype BlockText
type WindowSize = Int type WindowSize = Int
type MinNgramSize = Int type MinNgramSize = Int
termsUnsupervised :: TermType Lang -> Text -> IO [Terms] -- | Unsupervised ngrams extraction
termsUnsupervised (Unsupervised l n s m) = -- language agnostic extraction
pure -- TODO: newtype BlockText
. map (text2term l) termsUnsupervised :: TermType Lang -> Text -> [TermsWithCount]
. List.nub termsUnsupervised (Unsupervised { _tt_model = Nothing }) = panic "[termsUnsupervised] no model"
. (List.filter (\l' -> List.length l' >= s)) termsUnsupervised (Unsupervised { _tt_model = Just _tt_model, .. }) =
map (\(t, cnt) -> (text2term _tt_lang t, cnt))
. groupWithCounts
-- . List.nub
. (List.filter (\l' -> List.length l' >= _tt_windowSize))
. List.concat . List.concat
. mainEleveWith (maybe (panic "no model") identity m) n . mainEleveWith _tt_model _tt_ngramsSize
. uniText . uniText
termsUnsupervised _ = undefined termsUnsupervised _ = undefined
newTries :: Int -> Text -> Tries Token () newTries :: Int -> Text -> Tries Token ()
newTries n t = buildTries n (fmap toToken $ uniText t) newTries n t = buildTries n (fmap toToken $ uniText t)
...@@ -217,5 +217,3 @@ text2term lang txt = Terms txt (Set.fromList $ map (stem lang) txt) ...@@ -217,5 +217,3 @@ text2term lang txt = Terms txt (Set.fromList $ map (stem lang) txt)
isPunctuation :: Text -> Bool isPunctuation :: Text -> Bool
isPunctuation x = List.elem x $ (Text.pack . pure) isPunctuation x = List.elem x $ (Text.pack . pure)
<$> ("!?(),;.:" :: String) <$> ("!?(),;.:" :: String)
...@@ -32,6 +32,7 @@ Notes for current implementation: ...@@ -32,6 +32,7 @@ Notes for current implementation:
-} -}
{-# LANGUAGE ConstraintKinds #-} {-# LANGUAGE ConstraintKinds #-}
{-# LANGUAGE StandaloneDeriving #-}
{-# LANGUAGE TemplateHaskell #-} {-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE TypeFamilies #-} {-# LANGUAGE TypeFamilies #-}
...@@ -278,6 +279,8 @@ data Tries k e = Tries ...@@ -278,6 +279,8 @@ data Tries k e = Tries
makeLenses ''Tries makeLenses ''Tries
deriving instance (Show k, Show e) => Show (Tries k e)
buildTries :: Int -> [[Token]] -> Tries Token () buildTries :: Int -> [[Token]] -> Tries Token ()
buildTries n sentences = Tries buildTries n sentences = Tries
{ _fwd = buildTrie Forward n sentences { _fwd = buildTrie Forward n sentences
......
...@@ -40,8 +40,8 @@ words = monoTexts ...@@ -40,8 +40,8 @@ words = monoTexts
isSep :: Char -> Bool isSep :: Char -> Bool
isSep = (`elem` (",.:;?!(){}[]\"\'" :: String)) isSep = (`elem` (",.:;?!(){}[]\"\'" :: String))
monoTerms :: Lang -> Text -> [Terms] monoTerms :: Lang -> Text -> [TermsWithCount]
monoTerms l txt = map (monoText2term l) $ monoTexts txt monoTerms l txt = map (\t -> (monoText2term l t, 1)) $ monoTexts txt
monoTexts :: Text -> [Text] monoTexts :: Text -> [Text]
monoTexts = L.concat . monoTextsBySentence monoTexts = L.concat . monoTextsBySentence
......
...@@ -21,6 +21,7 @@ import Data.List (concat) ...@@ -21,6 +21,7 @@ import Data.List (concat)
import Gargantext.Prelude import Gargantext.Prelude
import Gargantext.Core (Lang(..)) import Gargantext.Core (Lang(..))
import Gargantext.Core.Types import Gargantext.Core.Types
import Gargantext.Core.Utils (groupWithCounts)
import Gargantext.Core.Text.Terms.Multi.PosTagging import Gargantext.Core.Text.Terms.Multi.PosTagging
import Gargantext.Core.Text.Terms.Multi.PosTagging.Types import Gargantext.Core.Text.Terms.Multi.PosTagging.Types
...@@ -37,14 +38,16 @@ import qualified Gargantext.Utils.SpacyNLP as SpacyNLP ...@@ -37,14 +38,16 @@ import qualified Gargantext.Utils.SpacyNLP as SpacyNLP
type NLP_API = Lang -> Text -> IO PosSentences type NLP_API = Lang -> Text -> IO PosSentences
------------------------------------------------------------------- -------------------------------------------------------------------
multiterms :: Lang -> Text -> IO [Terms] multiterms :: Lang -> Text -> IO [TermsWithCount]
multiterms = multiterms' tokenTag2terms multiterms l txt = do
ret <- multiterms' tokenTag2terms l txt
pure $ groupWithCounts ret
where where
multiterms' :: (TokenTag -> a) -> Lang -> Text -> IO [a] multiterms' :: (TokenTag -> a) -> Lang -> Text -> IO [a]
multiterms' f lang txt = concat multiterms' f lang txt' = concat
<$> map (map f) <$> map (map f)
<$> map (filter (\t -> _my_token_pos t == Just NP)) <$> map (filter (\t -> _my_token_pos t == Just NP))
<$> tokenTags lang txt <$> tokenTags lang txt'
------------------------------------------------------------------- -------------------------------------------------------------------
tokenTag2terms :: TokenTag -> Terms tokenTag2terms :: TokenTag -> Terms
...@@ -57,10 +60,10 @@ tokenTags l _ = panic $ "[G.C.T.T.Multi] Lang NLP API not implemented yet " < ...@@ -57,10 +60,10 @@ tokenTags l _ = panic $ "[G.C.T.T.Multi] Lang NLP API not implemented yet " <
tokenTagsWith :: Lang -> Text -> NLP_API -> IO [[TokenTag]] tokenTagsWith :: Lang -> Text -> NLP_API -> IO [[TokenTag]]
tokenTagsWith lang txt nlp = map (groupTokens lang) tokenTagsWith lang txt nlp = map (groupTokens lang)
<$> map tokens2tokensTags <$> map tokens2tokensTags
<$> map _sentenceTokens <$> map _sentenceTokens
<$> _sentences <$> _sentences
<$> nlp lang txt <$> nlp lang txt
---- | This function analyses and groups (or not) ngrams according to ---- | This function analyses and groups (or not) ngrams according to
......
...@@ -30,4 +30,3 @@ group2 p1 p2 (x@(TokenTag _ _ Nothing _):y) = (x: group2 p1 p2 y) ...@@ -30,4 +30,3 @@ group2 p1 p2 (x@(TokenTag _ _ Nothing _):y) = (x: group2 p1 p2 y)
group2 _ _ [x@(TokenTag _ _ (Just _) _)] = [x] group2 _ _ [x@(TokenTag _ _ (Just _) _)] = [x]
group2 p1 p2 (x@(TokenTag _ _ (Just _) _):y@(TokenTag _ _ Nothing _):z) = (x:y: group2 p1 p2 (y:z)) group2 p1 p2 (x@(TokenTag _ _ (Just _) _):y@(TokenTag _ _ Nothing _):z) = (x:y: group2 p1 p2 (y:z))
group2 _ _ [] = [] group2 _ _ [] = []
...@@ -40,4 +40,3 @@ groupTokens ntags = group2 NP NP ...@@ -40,4 +40,3 @@ groupTokens ntags = group2 NP NP
--groupNgrams ((x,_,"LOCATION"):(y,yy,"LOCATION"):xs) = groupNgrams ((x <> " " <> y,yy,"LOCATION"):xs) --groupNgrams ((x,_,"LOCATION"):(y,yy,"LOCATION"):xs) = groupNgrams ((x <> " " <> y,yy,"LOCATION"):xs)
-- --
--groupNgrams (x:xs) = (x:(groupNgrams xs)) --groupNgrams (x:xs) = (x:(groupNgrams xs))
...@@ -45,19 +45,22 @@ tokens2tokensTags :: [Token] -> [TokenTag] ...@@ -45,19 +45,22 @@ tokens2tokensTags :: [Token] -> [TokenTag]
tokens2tokensTags ts = filter' $ map tokenTag ts tokens2tokensTags ts = filter' $ map tokenTag ts
------------------------------------------------------------------------ ------------------------------------------------------------------------
tokenTag :: Token -> TokenTag tokenTag :: Token -> TokenTag
tokenTag (Token _ w _ l _ _ p n _ _) = TokenTag w' l' p n tokenTag (Token { .. }) = TokenTag { _my_token_word = w'
, _my_token_lemma = l'
, _my_token_pos = _tokenPos
, _my_token_ner = _tokenNer }
where where
w' = split w w' = split _tokenWord
l' = fromList (split l) l' = fromList (split _tokenLemma)
split = splitOn (pack " ") . toLower split = splitOn (pack " ") . toLower
filter' :: [TokenTag] -> [TokenTag] filter' :: [TokenTag] -> [TokenTag]
filter' xs = filter isNgrams xs filter' xs = filter isNgrams xs
where where
isNgrams (TokenTag _ _ p n) = isJust p || isJust n isNgrams (TokenTag { .. }) = isJust _my_token_pos || isJust _my_token_ner
------------------------------------------------------------------------ ------------------------------------------------------------------------
-- request = -- request =
-- "fr" : { -- "fr" : {
-- "tokenize.language" : "fr", -- "tokenize.language" : "fr",
-- "pos.model" : "edu/stanford/nlp/models/pos-tagger/french/french.tagger", -- "pos.model" : "edu/stanford/nlp/models/pos-tagger/french/french.tagger",
...@@ -66,9 +69,9 @@ filter' xs = filter isNgrams xs ...@@ -66,9 +69,9 @@ filter' xs = filter isNgrams xs
-- "depparse.model" : "edu/stanford/nlp/models/parser/nndep/UD_French.gz", -- "depparse.model" : "edu/stanford/nlp/models/parser/nndep/UD_French.gz",
-- "depparse.language" : "french", -- "depparse.language" : "french",
-- "ner.model": DATA_ROOT+"/eunews.fr.crf.gz", -- "ner.model": DATA_ROOT+"/eunews.fr.crf.gz",
-- "ssplit.newlineIsSentenceBreak": "always" -- "ssplit.newlineIsSentenceBreak": "always"
-- }, -- },
-- --
corenlp' :: ( FromJSON a corenlp' :: ( FromJSON a
, ConvertibleStrings p ByteString , ConvertibleStrings p ByteString
...@@ -80,6 +83,7 @@ corenlp' lang txt = do ...@@ -80,6 +83,7 @@ corenlp' lang txt = do
FR -> "{\"annotators\": \"tokenize,ssplit,pos,lemma,ner\", \"parse.model\":\"edu/stanford/nlp/models/lexparser/frenchFactored.ser.gz\", \"pos.model\":\"edu/stanford/nlp/models/pos-tagger/french/french.tagger\", \"tokenize.language\":\"fr\", \"outputFormat\": \"json\"}" FR -> "{\"annotators\": \"tokenize,ssplit,pos,lemma,ner\", \"parse.model\":\"edu/stanford/nlp/models/lexparser/frenchFactored.ser.gz\", \"pos.model\":\"edu/stanford/nlp/models/pos-tagger/french/french.tagger\", \"tokenize.language\":\"fr\", \"outputFormat\": \"json\"}"
_ -> panic $ pack "not implemented yet" _ -> panic $ pack "not implemented yet"
url <- parseRequest $ "POST http://localhost:9000/?properties=" <> properties url <- parseRequest $ "POST http://localhost:9000/?properties=" <> properties
-- curl -XPOST 'http://localhost:9000/?properties=%7B%22annotators%22:%20%22tokenize,ssplit,pos,ner%22,%20%22outputFormat%22:%20%22json%22%7D' -d 'hello world, hello' | jq .
let request = setRequestBodyLBS (cs txt) url let request = setRequestBodyLBS (cs txt) url
httpJSON request httpJSON request
......
...@@ -52,4 +52,3 @@ data PosSentences = PosSentences { _sentences :: [Sentence]} ...@@ -52,4 +52,3 @@ data PosSentences = PosSentences { _sentences :: [Sentence]}
deriving (Show, Generic) deriving (Show, Generic)
$(deriveJSON (unPrefix "_") ''PosSentences) $(deriveJSON (unPrefix "_") ''PosSentences)
{-| {-|
Module : Gargantext.Core.Text.Terms.WithList Module : Gargantext.Core.Text.Terms.WithList
Description : Description :
Copyright : (c) CNRS, 2017-Present Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3 License : AGPL + CECILL v3
Maintainer : team@gargantext.org Maintainer : team@gargantext.org
...@@ -21,6 +21,8 @@ import Data.Text (Text, concat, unwords) ...@@ -21,6 +21,8 @@ import Data.Text (Text, concat, unwords)
import Gargantext.Prelude import Gargantext.Prelude
import Gargantext.Core.Text.Context import Gargantext.Core.Text.Context
import Gargantext.Core.Text.Terms.Mono (monoTextsBySentence) import Gargantext.Core.Text.Terms.Mono (monoTextsBySentence)
import Gargantext.Core.Types (TermsCount)
import Gargantext.Core.Utils (groupWithCounts)
import Prelude (error) import Prelude (error)
import qualified Data.Algorithms.KMP as KMP import qualified Data.Algorithms.KMP as KMP
import qualified Data.IntMap.Strict as IntMap import qualified Data.IntMap.Strict as IntMap
...@@ -35,8 +37,11 @@ data Pattern = Pattern ...@@ -35,8 +37,11 @@ data Pattern = Pattern
type Patterns = [Pattern] type Patterns = [Pattern]
------------------------------------------------------------------------ ------------------------------------------------------------------------
replaceTerms :: Patterns -> [Text] -> [[Text]]
replaceTerms pats terms = go 0 data ReplaceTerms = KeepAll | LongestOnly
replaceTerms :: ReplaceTerms -> Patterns -> [Text] -> [[Text]]
replaceTerms rplaceTerms pats terms = go 0
where where
terms_len = length terms terms_len = length terms
...@@ -47,15 +52,17 @@ replaceTerms pats terms = go 0 ...@@ -47,15 +52,17 @@ replaceTerms pats terms = go 0
Just (len, term) -> Just (len, term) ->
term : go (ix + len) term : go (ix + len)
m = toMap
merge (len1, lab1) (len2, lab2) =
if len2 < len1 then (len1, lab1) else (len2, lab2)
m =
IntMap.fromListWith merge
[ (ix, (len, term)) [ (ix, (len, term))
| Pattern pat len term <- pats, ix <- KMP.match pat terms ] | Pattern pat len term <- pats, ix <- KMP.match pat terms ]
toMap = case rplaceTerms of
KeepAll -> IntMap.fromList
LongestOnly -> IntMap.fromListWith merge
where
merge (len1, lab1) (len2, lab2) =
if len2 < len1 then (len1, lab1) else (len2, lab2)
buildPatterns :: TermList -> Patterns buildPatterns :: TermList -> Patterns
buildPatterns = sortWith (Down . _pat_length) . concatMap buildPattern buildPatterns = sortWith (Down . _pat_length) . concatMap buildPattern
where where
...@@ -71,8 +78,8 @@ buildPatterns = sortWith (Down . _pat_length) . concatMap buildPattern ...@@ -71,8 +78,8 @@ buildPatterns = sortWith (Down . _pat_length) . concatMap buildPattern
-------------------------------------------------------------------------- --------------------------------------------------------------------------
-- Utils -- Utils
type MatchedText = Text type MatchedText = Text
termsInText :: Patterns -> Text -> [MatchedText] termsInText :: Patterns -> Text -> [(MatchedText, TermsCount)]
termsInText pats txt = List.nub termsInText pats txt = groupWithCounts
$ List.concat $ List.concat
$ map (map unwords) $ map (map unwords)
$ extractTermsWithList pats txt $ extractTermsWithList pats txt
...@@ -80,14 +87,14 @@ termsInText pats txt = List.nub ...@@ -80,14 +87,14 @@ termsInText pats txt = List.nub
-------------------------------------------------------------------------- --------------------------------------------------------------------------
extractTermsWithList :: Patterns -> Text -> Corpus [Text] extractTermsWithList :: Patterns -> Text -> Corpus [Text]
extractTermsWithList pats = map (replaceTerms pats) . monoTextsBySentence extractTermsWithList pats = map (replaceTerms KeepAll pats) . monoTextsBySentence
-- | Extract terms -- | Extract terms
-- >>> let termList = [(["chat blanc"], [["chat","blanc"]])] :: TermList -- >>> let termList = [(["chat blanc"], [["chat","blanc"]])] :: TermList
-- extractTermsWithList' (buildPatterns termList) "Le chat blanc"["chat blanc"] -- extractTermsWithList' (buildPatterns termList) "Le chat blanc"["chat blanc"]
-- ["chat blanc"] -- ["chat blanc"]
extractTermsWithList' :: Patterns -> Text -> [Text] extractTermsWithList' :: Patterns -> Text -> [Text]
extractTermsWithList' pats = map (concat . map concat . replaceTerms pats) extractTermsWithList' pats = map (concat . map concat . replaceTerms KeepAll pats)
. monoTextsBySentence . monoTextsBySentence
-------------------------------------------------------------------------- --------------------------------------------------------------------------
...@@ -95,7 +102,7 @@ extractTermsWithList' pats = map (concat . map concat . replaceTerms pats) ...@@ -95,7 +102,7 @@ extractTermsWithList' pats = map (concat . map concat . replaceTerms pats)
{- | Not used {- | Not used
filterWith :: TermList filterWith :: TermList
-> (a -> Text) -> (a -> Text)
-> [a] -> [a]
-> [(a, [Text])] -> [(a, [Text])]
filterWith termList f xs = filterWith' termList f zip xs filterWith termList f xs = filterWith' termList f zip xs
...@@ -103,7 +110,7 @@ filterWith termList f xs = filterWith' termList f zip xs ...@@ -103,7 +110,7 @@ filterWith termList f xs = filterWith' termList f zip xs
filterWith' :: TermList filterWith' :: TermList
-> (a -> Text) -> (a -> Text)
-> ([a] -> [[Text]] -> [b]) -> ([a] -> [[Text]] -> [b])
-> [a] -> [a]
-> [b] -> [b]
filterWith' termList f f' xs = f' xs filterWith' termList f f' xs = f' xs
$ map (extractTermsWithList' pats) $ map (extractTermsWithList' pats)
......
{-| {-|
Module : Gargantext.Types Module : Gargantext.Types
Description : Description :
Copyright : (c) CNRS, 2017-Present Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3 License : AGPL + CECILL v3
Maintainer : team@gargantext.org Maintainer : team@gargantext.org
...@@ -17,7 +17,7 @@ commentary with @some markup@. ...@@ -17,7 +17,7 @@ commentary with @some markup@.
module Gargantext.Core.Types ( module Gargantext.Core.Types.Main module Gargantext.Core.Types ( module Gargantext.Core.Types.Main
, module Gargantext.Database.Admin.Types.Node , module Gargantext.Database.Admin.Types.Node
, DebugMode(..), withDebugMode , DebugMode(..), withDebugMode
, Term, Terms(..) , Term, Terms(..), TermsCount, TermsWithCount
, TokenTag(..), POS(..), NER(..) , TokenTag(..), POS(..), NER(..)
, Label, Stems , Label, Stems
, HasInvalidError(..), assertValid , HasInvalidError(..), assertValid
...@@ -70,10 +70,13 @@ type Label = [Text] ...@@ -70,10 +70,13 @@ type Label = [Text]
data Terms = Terms { _terms_label :: Label data Terms = Terms { _terms_label :: Label
, _terms_stem :: Stems , _terms_stem :: Stems
} deriving (Ord, Show) } deriving (Ord, Show)
instance Eq Terms where instance Eq Terms where
(==) (Terms _ s1) (Terms _ s2) = s1 == s2 (==) (Terms _ s1) (Terms _ s2) = s1 == s2
type TermsCount = Int
type TermsWithCount = (Terms, TermsCount)
------------------------------------------------------------------------ ------------------------------------------------------------------------
data Tag = POS | NER data Tag = POS | NER
deriving (Show, Eq) deriving (Show, Eq)
...@@ -208,5 +211,3 @@ data TODO = TODO ...@@ -208,5 +211,3 @@ data TODO = TODO
instance ToSchema TODO where instance ToSchema TODO where
instance ToParamSchema TODO where instance ToParamSchema TODO where
---------------------------------------------------------------------------- ----------------------------------------------------------------------------
{-| {-|
Module : Gargantext.Utils Module : Gargantext.Utils
Description : Description :
Copyright : (c) CNRS, 2017-Present Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3 License : AGPL + CECILL v3
Maintainer : team@gargantext.org Maintainer : team@gargantext.org
...@@ -12,16 +12,19 @@ commentary with @some markup@. ...@@ -12,16 +12,19 @@ commentary with @some markup@.
-} -}
module Gargantext.Core.Utils ( module Gargantext.Core.Utils (
-- module Gargantext.Utils.Chronos -- module Gargantext.Utils.Chronos
module Gargantext.Core.Utils.Prefix module Gargantext.Core.Utils.Prefix
, something , something
, alphanum , alphanum
, choices , choices
, randomString , randomString
, groupWithCounts
, addTuples
) where ) where
import Data.Char (chr, ord) import Data.Char (chr, ord)
import qualified Data.List as List
import Data.Maybe import Data.Maybe
import Data.Monoid import Data.Monoid
import Data.Text (Text, pack) import Data.Text (Text, pack)
...@@ -57,3 +60,17 @@ randomString :: Int -> IO Text ...@@ -57,3 +60,17 @@ randomString :: Int -> IO Text
randomString num = do randomString num = do
str <- choices num alphanum str <- choices num alphanum
pure $ pack str pure $ pack str
-- | Given a list of items of type 'a', return list with unique items
-- (like List.nub) but tuple-d with their counts in the original list
groupWithCounts :: (Ord a, Eq a) => [a] -> [(a, Int)]
groupWithCounts = map f
. List.group
. List.sort
where
f [] = panic "[groupWithCounts] impossible"
f ts@(t:_) = (t, length ts)
addTuples :: (Num a, Num b) => (a, b) -> (a, b) -> (a, b)
addTuples (a1, b1) (a2, b2) = (a1 + a2, b1 + b2)
...@@ -65,7 +65,7 @@ flowPhylo cId = do ...@@ -65,7 +65,7 @@ flowPhylo cId = do
patterns = buildPatterns termList patterns = buildPatterns termList
-- | To filter the Ngrams of a document based on the termList -- | To filter the Ngrams of a document based on the termList
filterTerms :: Patterns -> (Date, Text) -> (Date, [Text]) filterTerms :: Patterns -> (Date, Text) -> (Date, [Text])
filterTerms patterns' (y,d) = (y,termsInText patterns' d) filterTerms patterns' (y,d) = (y, fst <$> termsInText patterns' d)
docs = map ((\(y,t) -> Document y t) . filterTerms patterns) docs' docs = map ((\(y,t) -> Document y t) . filterTerms patterns) docs'
...@@ -123,4 +123,3 @@ writePhylo _fp _phview = undefined ...@@ -123,4 +123,3 @@ writePhylo _fp _phview = undefined
-- refactor 2021 -- refactor 2021
-- viewPhylo2Svg :: PhyloView -> IO DB.ByteString -- viewPhylo2Svg :: PhyloView -> IO DB.ByteString
-- viewPhylo2Svg p = graphvizWithHandle Dot (viewToDot p) Svg DB.hGetContents -- viewPhylo2Svg p = graphvizWithHandle Dot (viewToDot p) Svg DB.hGetContents
...@@ -48,7 +48,7 @@ module Gargantext.Database.Action.Flow -- (flowDatabase, ngrams2list) ...@@ -48,7 +48,7 @@ module Gargantext.Database.Action.Flow -- (flowDatabase, ngrams2list)
where where
import Conduit import Conduit
import Control.Lens ((^.), view, _Just, makeLenses) import Control.Lens ((^.), view, _Just, makeLenses, over, traverse)
import Control.Monad.Reader (MonadReader) import Control.Monad.Reader (MonadReader)
import Data.Aeson.TH (deriveJSON) import Data.Aeson.TH (deriveJSON)
import Data.Conduit.Internal (zipSources) import Data.Conduit.Internal (zipSources)
...@@ -61,7 +61,6 @@ import Data.Maybe (catMaybes) ...@@ -61,7 +61,6 @@ import Data.Maybe (catMaybes)
import Data.Monoid import Data.Monoid
import Data.Swagger import Data.Swagger
import qualified Data.Text as T import qualified Data.Text as T
import Data.Traversable (traverse)
import Data.Tuple.Extra (first, second) import Data.Tuple.Extra (first, second)
import GHC.Generics (Generic) import GHC.Generics (Generic)
import Servant.Client (ClientError) import Servant.Client (ClientError)
...@@ -84,9 +83,10 @@ import Gargantext.Core.Text.List.Group.WithStem ({-StopSize(..),-} GroupParams(. ...@@ -84,9 +83,10 @@ import Gargantext.Core.Text.List.Group.WithStem ({-StopSize(..),-} GroupParams(.
import Gargantext.Core.Text.List.Social (FlowSocialListWith) import Gargantext.Core.Text.List.Social (FlowSocialListWith)
import Gargantext.Core.Text.Terms import Gargantext.Core.Text.Terms
import Gargantext.Core.Text.Terms.Mono.Stem.En (stemIt) import Gargantext.Core.Text.Terms.Mono.Stem.En (stemIt)
import Gargantext.Core.Types (POS(NP)) import Gargantext.Core.Types (POS(NP), TermsCount)
import Gargantext.Core.Types.Individu (User(..)) import Gargantext.Core.Types.Individu (User(..))
import Gargantext.Core.Types.Main import Gargantext.Core.Types.Main
import Gargantext.Core.Utils (addTuples)
import Gargantext.Core.Utils.Prefix (unPrefix, unPrefixSwagger) import Gargantext.Core.Utils.Prefix (unPrefix, unPrefixSwagger)
import Gargantext.Database.Action.Flow.List import Gargantext.Database.Action.Flow.List
import Gargantext.Database.Action.Flow.Types import Gargantext.Database.Action.Flow.Types
...@@ -362,25 +362,27 @@ insertMasterDocs c lang hs = do ...@@ -362,25 +362,27 @@ insertMasterDocs c lang hs = do
-- this will enable global database monitoring -- this will enable global database monitoring
-- maps :: IO Map Ngrams (Map NgramsType (Map NodeId Int)) -- maps :: IO Map Ngrams (Map NgramsType (Map NodeId Int))
mapNgramsDocs' :: HashMap ExtractedNgrams (Map NgramsType (Map NodeId Int)) mapNgramsDocs' :: HashMap ExtractedNgrams (Map NgramsType (Map NodeId (Int, TermsCount)))
<- mapNodeIdNgrams <- mapNodeIdNgrams
<$> documentIdWithNgrams <$> documentIdWithNgrams
(extractNgramsT $ withLang lang documentsWithId) (extractNgramsT $ withLang lang documentsWithId)
documentsWithId documentsWithId
lId <- getOrMkList masterCorpusId masterUserId lId <- getOrMkList masterCorpusId masterUserId
-- _ <- saveDocNgramsWith lId mapNgramsDocs'
_ <- saveDocNgramsWith lId mapNgramsDocs' _ <- saveDocNgramsWith lId mapNgramsDocs'
-- _cooc <- insertDefaultNode NodeListCooc lId masterUserId -- _cooc <- insertDefaultNode NodeListCooc lId masterUserId
pure ids' pure ids'
saveDocNgramsWith :: ( FlowCmdM env err m) saveDocNgramsWith :: (FlowCmdM env err m)
=> ListId => ListId
-> HashMap ExtractedNgrams (Map NgramsType (Map NodeId Int)) -> HashMap ExtractedNgrams (Map NgramsType (Map NodeId (Int, TermsCount)))
-> m () -> m ()
saveDocNgramsWith lId mapNgramsDocs' = do saveDocNgramsWith lId mapNgramsDocs' = do
terms2id <- insertExtractedNgrams $ HashMap.keys mapNgramsDocs' --printDebug "[saveDocNgramsWith] mapNgramsDocs'" mapNgramsDocs'
--printDebug "terms2id" terms2id let mapNgramsDocsNoCount = over (traverse . traverse . traverse) fst mapNgramsDocs'
terms2id <- insertExtractedNgrams $ HashMap.keys mapNgramsDocsNoCount
let mapNgramsDocs = HashMap.mapKeys extracted2ngrams mapNgramsDocs' let mapNgramsDocs = HashMap.mapKeys extracted2ngrams mapNgramsDocs'
...@@ -397,7 +399,7 @@ saveDocNgramsWith lId mapNgramsDocs' = do ...@@ -397,7 +399,7 @@ saveDocNgramsWith lId mapNgramsDocs' = do
<*> Just (fromIntegral w :: Double) <*> Just (fromIntegral w :: Double)
| (terms'', mapNgramsTypes) <- HashMap.toList mapNgramsDocs | (terms'', mapNgramsTypes) <- HashMap.toList mapNgramsDocs
, (ngrams_type, mapNodeIdWeight) <- Map.toList mapNgramsTypes , (ngrams_type, mapNodeIdWeight) <- Map.toList mapNgramsTypes
, (nId, w) <- Map.toList mapNodeIdWeight , (nId, (w, _cnt)) <- Map.toList mapNodeIdWeight
] ]
-- to be removed -- to be removed
...@@ -456,14 +458,14 @@ mergeData rs = catMaybes . map toDocumentWithId . Map.toList ...@@ -456,14 +458,14 @@ mergeData rs = catMaybes . map toDocumentWithId . Map.toList
------------------------------------------------------------------------ ------------------------------------------------------------------------
documentIdWithNgrams :: HasNodeError err documentIdWithNgrams :: HasNodeError err
=> (a => (a
-> Cmd err (HashMap b (Map NgramsType Int))) -> Cmd err (HashMap b (Map NgramsType Int, TermsCount)))
-> [Indexed NodeId a] -> [Indexed NodeId a]
-> Cmd err [DocumentIdWithNgrams a b] -> Cmd err [DocumentIdWithNgrams a b]
documentIdWithNgrams f = traverse toDocumentIdWithNgrams documentIdWithNgrams f = traverse toDocumentIdWithNgrams
where where
toDocumentIdWithNgrams d = do toDocumentIdWithNgrams d = do
e <- f $ _unIndex d e <- f $ _unIndex d
pure $ DocumentIdWithNgrams d e pure $ DocumentIdWithNgrams d e
-- | TODO check optimization -- | TODO check optimization
...@@ -471,13 +473,17 @@ mapNodeIdNgrams :: (Ord b, Hashable b) ...@@ -471,13 +473,17 @@ mapNodeIdNgrams :: (Ord b, Hashable b)
=> [DocumentIdWithNgrams a b] => [DocumentIdWithNgrams a b]
-> HashMap b -> HashMap b
(Map NgramsType (Map NgramsType
(Map NodeId Int) (Map NodeId (Int, TermsCount))
) )
mapNodeIdNgrams = HashMap.unionsWith (Map.unionWith (Map.unionWith (+))) . fmap f mapNodeIdNgrams = HashMap.unionsWith (Map.unionWith (Map.unionWith addTuples)) . fmap f
where where
-- | NOTE We are somehow multiplying 'TermsCount' here: If the
-- same ngrams term has different ngrams types, the 'TermsCount'
-- for it (which is the number of times the terms appears in a
-- document) is copied over to all its types.
f :: DocumentIdWithNgrams a b f :: DocumentIdWithNgrams a b
-> HashMap b (Map NgramsType (Map NodeId Int)) -> HashMap b (Map NgramsType (Map NodeId (Int, TermsCount)))
f d = fmap (fmap (Map.singleton nId)) $ documentNgrams d f d = fmap (\(ngramsTypeMap, cnt) -> fmap (\i -> Map.singleton nId (i, cnt)) ngramsTypeMap) $ documentNgrams d
where where
nId = _index $ documentWithId d nId = _index $ documentWithId d
...@@ -488,25 +494,25 @@ instance ExtractNgramsT HyperdataContact ...@@ -488,25 +494,25 @@ instance ExtractNgramsT HyperdataContact
extractNgramsT l hc = HashMap.mapKeys (cleanExtractedNgrams 255) <$> extract l hc extractNgramsT l hc = HashMap.mapKeys (cleanExtractedNgrams 255) <$> extract l hc
where where
extract :: TermType Lang -> HyperdataContact extract :: TermType Lang -> HyperdataContact
-> Cmd err (HashMap ExtractedNgrams (Map NgramsType Int)) -> Cmd err (HashMap ExtractedNgrams (Map NgramsType Int, TermsCount))
extract _l hc' = do extract _l hc' = do
let authors = map text2ngrams let authors = map text2ngrams
$ maybe ["Nothing"] (\a -> [a]) $ maybe ["Nothing"] (\a -> [a])
$ view (hc_who . _Just . cw_lastName) hc' $ view (hc_who . _Just . cw_lastName) hc'
pure $ HashMap.fromList $ [(SimpleNgrams a', Map.singleton Authors 1) | a' <- authors ] pure $ HashMap.fromList $ [(SimpleNgrams a', (Map.singleton Authors 1, 1)) | a' <- authors ]
instance ExtractNgramsT HyperdataDocument instance ExtractNgramsT HyperdataDocument
where where
extractNgramsT :: TermType Lang extractNgramsT :: TermType Lang
-> HyperdataDocument -> HyperdataDocument
-> Cmd err (HashMap ExtractedNgrams (Map NgramsType Int)) -> Cmd err (HashMap ExtractedNgrams (Map NgramsType Int, TermsCount))
extractNgramsT lang hd = HashMap.mapKeys (cleanExtractedNgrams 255) <$> extractNgramsT' lang hd extractNgramsT lang hd = HashMap.mapKeys (cleanExtractedNgrams 255) <$> extractNgramsT' lang hd
where where
extractNgramsT' :: TermType Lang extractNgramsT' :: TermType Lang
-> HyperdataDocument -> HyperdataDocument
-> Cmd err (HashMap ExtractedNgrams (Map NgramsType Int)) -> Cmd err (HashMap ExtractedNgrams (Map NgramsType Int, TermsCount))
extractNgramsT' lang' doc = do extractNgramsT' lang' doc = do
let source = text2ngrams let source = text2ngrams
$ maybe "Nothing" identity $ maybe "Nothing" identity
...@@ -520,23 +526,23 @@ instance ExtractNgramsT HyperdataDocument ...@@ -520,23 +526,23 @@ instance ExtractNgramsT HyperdataDocument
$ maybe ["Nothing"] (T.splitOn ", ") $ maybe ["Nothing"] (T.splitOn ", ")
$ _hd_authors doc $ _hd_authors doc
terms' <- map (enrichedTerms (lang' ^. tt_lang) CoreNLP NP) termsWithCounts' <- map (\(t, cnt) -> (enrichedTerms (lang' ^. tt_lang) CoreNLP NP t, cnt))
<$> concat <$> concat
<$> liftBase (extractTerms lang' $ hasText doc) <$> liftBase (extractTerms lang' $ hasText doc)
pure $ HashMap.fromList pure $ HashMap.fromList
$ [(SimpleNgrams source, Map.singleton Sources 1) ] $ [(SimpleNgrams source, (Map.singleton Sources 1, 1)) ]
<> [(SimpleNgrams i', Map.singleton Institutes 1) | i' <- institutes ] <> [(SimpleNgrams i', (Map.singleton Institutes 1, 1)) | i' <- institutes ]
<> [(SimpleNgrams a', Map.singleton Authors 1) | a' <- authors ] <> [(SimpleNgrams a', (Map.singleton Authors 1, 1)) | a' <- authors ]
<> [(EnrichedNgrams t', Map.singleton NgramsTerms 1) | t' <- terms' ] <> [(EnrichedNgrams t', (Map.singleton NgramsTerms 1, cnt')) | (t', cnt') <- termsWithCounts' ]
instance (ExtractNgramsT a, HasText a) => ExtractNgramsT (Node a) instance (ExtractNgramsT a, HasText a) => ExtractNgramsT (Node a)
where where
extractNgramsT l (Node _ _ _ _ _ _ _ h) = extractNgramsT l h extractNgramsT l (Node { _node_hyperdata = h }) = extractNgramsT l h
instance HasText a => HasText (Node a) instance HasText a => HasText (Node a)
where where
hasText (Node _ _ _ _ _ _ _ h) = hasText h hasText (Node { _node_hyperdata = h }) = hasText h
......
...@@ -15,6 +15,7 @@ module Gargantext.Database.Action.Flow.Utils ...@@ -15,6 +15,7 @@ module Gargantext.Database.Action.Flow.Utils
import Data.Map (Map) import Data.Map (Map)
import Data.HashMap.Strict (HashMap) import Data.HashMap.Strict (HashMap)
import Gargantext.Core.Types (TermsCount)
import Gargantext.Database.Admin.Types.Node import Gargantext.Database.Admin.Types.Node
import Gargantext.Database.Prelude (Cmd) import Gargantext.Database.Prelude (Cmd)
import Gargantext.Database.Query.Table.ContextNodeNgrams import Gargantext.Database.Query.Table.ContextNodeNgrams
...@@ -29,24 +30,24 @@ import qualified Data.HashMap.Strict as HashMap ...@@ -29,24 +30,24 @@ import qualified Data.HashMap.Strict as HashMap
data DocumentIdWithNgrams a b = data DocumentIdWithNgrams a b =
DocumentIdWithNgrams DocumentIdWithNgrams
{ documentWithId :: Indexed NodeId a { documentWithId :: Indexed NodeId a
, documentNgrams :: HashMap b (Map NgramsType Int) , documentNgrams :: HashMap b (Map NgramsType Int, TermsCount)
} deriving (Show) } deriving (Show)
insertDocNgrams :: ListId insertDocNgrams :: ListId
-> HashMap (Indexed NgramsId Ngrams) (Map NgramsType (Map DocId Int)) -> HashMap (Indexed NgramsId Ngrams) (Map NgramsType (Map DocId (Int, TermsCount)))
-> Cmd err Int -> Cmd err Int
insertDocNgrams lId m = insertContextNodeNgrams ns insertDocNgrams lId m = do
printDebug "[insertDocNgrams] ns" ns
insertContextNodeNgrams ns
where where
ns = [ ContextNodeNgrams docId lId (ng^.index) ns = [ ContextNodeNgrams docId lId (ng^.index)
(ngramsTypeId t) (ngramsTypeId t)
(fromIntegral i) (fromIntegral i)
cnt
| (ng, t2n2i) <- HashMap.toList m | (ng, t2n2i) <- HashMap.toList m
, (t, n2i) <- DM.toList t2n2i , (t, n2i) <- DM.toList t2n2i
, (docId, i) <- DM.toList n2i , (docId, (i, cnt)) <- DM.toList n2i
] ]
-- [(NodeId, {Ngrams: ({NgramsType: Int}, TermsCount)})]
-- {Ngrams: {NgramsType: {NodeId: (Int, TermsCount)}}}
...@@ -121,6 +121,7 @@ getOccByNgramsOnlyFast cId lId nt = do ...@@ -121,6 +121,7 @@ getOccByNgramsOnlyFast cId lId nt = do
-> Cmd err [(Text, DPST.PGArray Int)] -> Cmd err [(Text, DPST.PGArray Int)]
run cId' lId' nt' = runPGSQuery query run cId' lId' nt' = runPGSQuery query
( cId' ( cId'
, cId'
, lId' , lId'
, ngramsTypeId nt' , ngramsTypeId nt'
) )
...@@ -130,7 +131,14 @@ getOccByNgramsOnlyFast cId lId nt = do ...@@ -130,7 +131,14 @@ getOccByNgramsOnlyFast cId lId nt = do
SELECT ng.terms SELECT ng.terms
-- , ng.id -- , ng.id
--, round(nng.weight) --, round(nng.weight)
, ARRAY(SELECT DISTINCT context_node_ngrams.context_id FROM context_node_ngrams WHERE ng.id = ngrams_id) AS context_ids , ARRAY(
SELECT DISTINCT context_node_ngrams.context_id
FROM context_node_ngrams
JOIN nodes_contexts
ON context_node_ngrams.context_id = nodes_contexts.context_id
WHERE ng.id = context_node_ngrams.ngrams_id
AND nodes_contexts.node_id = ?
) AS context_ids
-- , ns.version -- , ns.version
-- , nng.ngrams_type -- , nng.ngrams_type
-- , ns.ngrams_type_id -- , ns.ngrams_type_id
......
...@@ -14,7 +14,10 @@ module Gargantext.Database.Action.Search where ...@@ -14,7 +14,10 @@ module Gargantext.Database.Action.Search where
import Control.Arrow (returnA) import Control.Arrow (returnA)
import Control.Lens ((^.)) import Control.Lens ((^.))
import qualified Data.List as List
import qualified Data.Map as Map
import Data.Maybe import Data.Maybe
import qualified Data.Set as Set
import Data.Text (Text, unpack, intercalate) import Data.Text (Text, unpack, intercalate)
import Data.Time (UTCTime) import Data.Time (UTCTime)
import Gargantext.Core import Gargantext.Core
...@@ -26,8 +29,11 @@ import Gargantext.Database.Query.Filter ...@@ -26,8 +29,11 @@ import Gargantext.Database.Query.Filter
import Gargantext.Database.Query.Join (leftJoin5) import Gargantext.Database.Query.Join (leftJoin5)
import Gargantext.Database.Query.Table.Node import Gargantext.Database.Query.Table.Node
import Gargantext.Database.Query.Table.Context import Gargantext.Database.Query.Table.Context
import Gargantext.Database.Query.Table.ContextNodeNgrams (queryContextNodeNgramsTable)
import Gargantext.Database.Query.Table.NodeContext import Gargantext.Database.Query.Table.NodeContext
import Gargantext.Database.Query.Table.NodeContext_NodeContext import Gargantext.Database.Query.Table.NodeContext_NodeContext
import Gargantext.Database.Schema.ContextNodeNgrams (ContextNodeNgramsPoly(..))
import Gargantext.Database.Schema.Ngrams (NgramsType(..))
import Gargantext.Database.Schema.Node import Gargantext.Database.Schema.Node
import Gargantext.Database.Schema.Context import Gargantext.Database.Schema.Context
import Gargantext.Prelude import Gargantext.Prelude
...@@ -43,7 +49,7 @@ searchDocInDatabase :: HasDBid NodeType ...@@ -43,7 +49,7 @@ searchDocInDatabase :: HasDBid NodeType
-> Cmd err [(NodeId, HyperdataDocument)] -> Cmd err [(NodeId, HyperdataDocument)]
searchDocInDatabase p t = runOpaQuery (queryDocInDatabase p t) searchDocInDatabase p t = runOpaQuery (queryDocInDatabase p t)
where where
-- | Global search query where ParentId is Master Node Corpus Id -- | Global search query where ParentId is Master Node Corpus Id
queryDocInDatabase :: ParentId -> Text -> O.Select (Column SqlInt4, Column SqlJsonb) queryDocInDatabase :: ParentId -> Text -> O.Select (Column SqlInt4, Column SqlJsonb)
queryDocInDatabase _p q = proc () -> do queryDocInDatabase _p q = proc () -> do
row <- queryNodeSearchTable -< () row <- queryNodeSearchTable -< ()
...@@ -51,6 +57,74 @@ searchDocInDatabase p t = runOpaQuery (queryDocInDatabase p t) ...@@ -51,6 +57,74 @@ searchDocInDatabase p t = runOpaQuery (queryDocInDatabase p t)
restrict -< (_ns_typename row) .== (sqlInt4 $ toDBid NodeDocument) restrict -< (_ns_typename row) .== (sqlInt4 $ toDBid NodeDocument)
returnA -< (_ns_id row, _ns_hyperdata row) returnA -< (_ns_id row, _ns_hyperdata row)
------------------------------------------------------------------------
-- | Search ngrams in documents, ranking them by TF-IDF. We narrow our
-- search only to map/candidate terms.
searchInCorpusWithNgrams :: HasDBid NodeType
=> CorpusId
-> ListId
-> IsTrash
-> NgramsType
-> [[Text]]
-> Maybe Offset
-> Maybe Limit
-> Maybe OrderBy
-> Cmd err [FacetDoc]
searchInCorpusWithNgrams _cId _lId _t _ngt _q _o _l _order = undefined
-- | Compute TF-IDF for all 'ngramIds' in given 'CorpusId'. In this
-- case only the "TF" part makes sense and so we only compute the
-- ratio of "number of times our terms appear in given document" and
-- "number of all terms in document" and return a sorted list of
-- document ids
tfidfAll :: CorpusId -> [Int] -> Cmd err [Int]
tfidfAll cId ngramIds = do
let ngramIdsSet = Set.fromList ngramIds
docsWithNgrams <- runOpaQuery (queryCorpusWithNgrams cId ngramIds) :: Cmd err [(Int, Int, Int)]
-- NOTE The query returned docs with ANY ngramIds. We need to further
-- restrict to ALL ngramIds.
let docsNgramsM =
Map.fromListWith (Set.union)
[ (ctxId, Set.singleton ngrams_id)
| (ctxId, ngrams_id, _) <- docsWithNgrams]
let docsWithAllNgramsS = Set.fromList $ List.map fst $
List.filter (\(_, docNgrams) ->
ngramIdsSet == Set.intersection ngramIdsSet docNgrams) $ Map.toList docsNgramsM
let docsWithAllNgrams =
List.filter (\(ctxId, _, _) ->
Set.member ctxId docsWithAllNgramsS) docsWithNgrams
printDebug "[tfidfAll] docsWithAllNgrams" docsWithAllNgrams
let docsWithCounts = Map.fromListWith (+) [ (ctxId, doc_count)
| (ctxId, _, doc_count) <- docsWithAllNgrams]
printDebug "[tfidfAll] docsWithCounts" docsWithCounts
let totals = [ ( ctxId
, ngrams_id
, fromIntegral doc_count :: Double
, fromIntegral (fromMaybe 0 $ Map.lookup ctxId docsWithCounts) :: Double)
| (ctxId, ngrams_id, doc_count) <- docsWithAllNgrams]
let tfidf_sorted = List.sortOn snd [(ctxId, doc_count/s)
| (ctxId, _, doc_count, s) <- totals]
pure $ List.map fst $ List.reverse tfidf_sorted
-- | Query for searching the 'context_node_ngrams' table so that we
-- find docs with ANY given 'ngramIds'.
queryCorpusWithNgrams :: CorpusId -> [Int] -> Select (Column SqlInt4, Column SqlInt4, Column SqlInt4)
queryCorpusWithNgrams cId ngramIds = proc () -> do
row <- queryContextNodeNgramsTable -< ()
restrict -< (_cnng_node_id row) .== (pgNodeId cId)
restrict -< in_ (sqlInt4 <$> ngramIds) (_cnng_ngrams_id row)
returnA -< ( _cnng_context_id row
, _cnng_ngrams_id row
, _cnng_doc_count row)
--returnA -< row
-- returnA -< ( _cnng_context_id row
-- , _cnng_node_id row
-- , _cnng_ngrams_id row
-- , _cnng_ngramsType row
-- , _cnng_weight row
-- , _cnng_doc_count row)
------------------------------------------------------------------------ ------------------------------------------------------------------------
-- | todo add limit and offset and order -- | todo add limit and offset and order
searchInCorpus :: HasDBid NodeType searchInCorpus :: HasDBid NodeType
...@@ -209,4 +283,3 @@ queryContactViaDoc = ...@@ -209,4 +283,3 @@ queryContactViaDoc =
) )
) -> Column SqlBool ) -> Column SqlBool
cond45 (doc, (corpus, (_,(_,_)))) = doc^.cs_id .== corpus^.nc_context_id cond45 (doc, (corpus, (_,(_,_)))) = doc^.cs_id .== corpus^.nc_context_id
...@@ -103,7 +103,7 @@ data Facet id created title hyperdata category ngramCount score = ...@@ -103,7 +103,7 @@ data Facet id created title hyperdata category ngramCount score =
, facetDoc_score :: score , facetDoc_score :: score
} deriving (Show, Generic) } deriving (Show, Generic)
{- | TODO after demo {- | TODO after demo
data Facet id date hyperdata score = data Facet id date hyperdata score =
FacetDoc { facetDoc_id :: id FacetDoc { facetDoc_id :: id
, facetDoc_date :: date , facetDoc_date :: date
, facetDoc_hyperdata :: hyperdata , facetDoc_hyperdata :: hyperdata
...@@ -163,7 +163,7 @@ type FacetPairedReadNull = FacetPaired (Column (Nullable SqlInt4) ) ...@@ -163,7 +163,7 @@ type FacetPairedReadNull = FacetPaired (Column (Nullable SqlInt4) )
(Column (Nullable SqlInt4) ) (Column (Nullable SqlInt4) )
type FacetPairedReadNullAgg = FacetPaired (Aggregator (Column (Nullable SqlInt4) ) type FacetPairedReadNullAgg = FacetPaired (Aggregator (Column (Nullable SqlInt4) )
(Column (Nullable SqlInt4) ) (Column (Nullable SqlInt4) )
) )
(Aggregator (Column (Nullable SqlTimestamptz)) (Aggregator (Column (Nullable SqlTimestamptz))
(Column (Nullable SqlTimestamptz)) (Column (Nullable SqlTimestamptz))
......
...@@ -43,7 +43,7 @@ import Opaleye.Internal.Join (NullMaker(..)) ...@@ -43,7 +43,7 @@ import Opaleye.Internal.Join (NullMaker(..))
import qualified Opaleye.Internal.Unpackspec() import qualified Opaleye.Internal.Unpackspec()
keepWhen :: (a -> Field SqlBool) -> SelectArr a a keepWhen :: (a -> Field SqlBool) -> SelectArr a a
keepWhen p = proc a -> do keepWhen p = proc a -> do
restrict -< p a restrict -< p a
returnA -< a returnA -< a
...@@ -61,7 +61,7 @@ leftJoin2 = leftJoin ...@@ -61,7 +61,7 @@ leftJoin2 = leftJoin
------------------------------------------------------------------------ ------------------------------------------------------------------------
-- | LeftJoin3 in two ways to write it -- | LeftJoin3 in two ways to write it
leftJoin3 :: Select columnsA -> Select columnsB -> Select columnsC leftJoin3 :: Select columnsA -> Select columnsB -> Select columnsC
-> ((columnsA, columnsB, columnsC) -> Column SqlBool) -> ((columnsA, columnsB, columnsC) -> Column SqlBool)
-> Select (columnsA, columnsB, columnsC) -> Select (columnsA, columnsB, columnsC)
leftJoin3 q1 q2 q3 cond = ((,,) <$> q1 <*> q2 <*> q3) >>> keepWhen cond leftJoin3 q1 q2 q3 cond = ((,,) <$> q1 <*> q2 <*> q3) >>> keepWhen cond
...@@ -82,7 +82,7 @@ leftJoin4' :: Select columnsA ...@@ -82,7 +82,7 @@ leftJoin4' :: Select columnsA
-> Select columnsB -> Select columnsB
-> Select columnsC -> Select columnsC
-> Select columnsD -> Select columnsD
-> ((columnsA, columnsB, columnsC, columnsD) -> Column SqlBool) -> ((columnsA, columnsB, columnsC, columnsD) -> Column SqlBool)
-> Select (columnsA, columnsB, columnsC, columnsD) -> Select (columnsA, columnsB, columnsC, columnsD)
leftJoin4' q1 q2 q3 q4 cond = ((,,,) <$> q1 <*> q2 <*> q3 <*> q4) >>> keepWhen cond leftJoin4' q1 q2 q3 q4 cond = ((,,,) <$> q1 <*> q2 <*> q3 <*> q4) >>> keepWhen cond
...@@ -375,4 +375,3 @@ leftJoin9 q1 q2 q3 q4 q5 q6 q7 q8 q9 ...@@ -375,4 +375,3 @@ leftJoin9 q1 q2 q3 q4 q5 q6 q7 q8 q9
) cond67 ) cond67
) cond78 ) cond78
) cond89 ) cond89
...@@ -36,12 +36,13 @@ queryContextNodeNgramsTable = selectTable contextNodeNgramsTable ...@@ -36,12 +36,13 @@ queryContextNodeNgramsTable = selectTable contextNodeNgramsTable
-- | Insert utils -- | Insert utils
insertContextNodeNgrams :: [ContextNodeNgrams] -> Cmd err Int insertContextNodeNgrams :: [ContextNodeNgrams] -> Cmd err Int
insertContextNodeNgrams = insertContextNodeNgramsW insertContextNodeNgrams = insertContextNodeNgramsW
. map (\(ContextNodeNgrams c n ng nt w) -> . map (\(ContextNodeNgrams c n ng nt w dc) ->
ContextNodeNgrams (pgContextId c) ContextNodeNgrams (pgContextId c)
(pgNodeId n) (pgNodeId n)
(sqlInt4 ng) (sqlInt4 ng)
(pgNgramsTypeId nt) (pgNgramsTypeId nt)
(sqlDouble w) (sqlDouble w)
(sqlInt4 dc)
) )
insertContextNodeNgramsW :: [ContextNodeNgramsWrite] -> Cmd err Int insertContextNodeNgramsW :: [ContextNodeNgramsWrite] -> Cmd err Int
......
...@@ -35,8 +35,9 @@ updateHyperdata i h = mkCmd $ \c -> putStrLn "before runUpdate_" >> ...@@ -35,8 +35,9 @@ updateHyperdata i h = mkCmd $ \c -> putStrLn "before runUpdate_" >>
updateHyperdataQuery :: ToJSON a => NodeId -> a -> Update Int64 updateHyperdataQuery :: ToJSON a => NodeId -> a -> Update Int64
updateHyperdataQuery i h = seq h' $ trace "updateHyperdataQuery: encoded JSON" $ Update updateHyperdataQuery i h = seq h' $ trace "updateHyperdataQuery: encoded JSON" $ Update
{ uTable = nodeTable { uTable = nodeTable
, uUpdateWith = updateEasy (\ (Node _ni _nh _nt _nu _np _nn _nd _h) , uUpdateWith = updateEasy (\ (Node { .. })
-> trace "updating mate" $ Node _ni _nh _nt _nu _np _nn _nd h' -> Node { _node_hyperdata = h', .. }
-- -> trace "updating mate" $ Node _ni _nh _nt _nu _np _nn _nd h'
) )
, uWhere = (\row -> {-trace "uWhere" $-} _node_id row .== pgNodeId i ) , uWhere = (\row -> {-trace "uWhere" $-} _node_id row .== pgNodeId i )
, uReturning = rCount , uReturning = rCount
...@@ -63,5 +64,3 @@ updateNodesWithType_ :: ( HasNodeError err ...@@ -63,5 +64,3 @@ updateNodesWithType_ :: ( HasNodeError err
updateNodesWithType_ nt h = do updateNodesWithType_ nt h = do
ns <- getNodesIdWithType nt ns <- getNodesIdWithType nt
mapM (\n -> updateHyperdata n h) ns mapM (\n -> updateHyperdata n h) ns
...@@ -19,6 +19,7 @@ module Gargantext.Database.Schema.ContextNodeNgrams ...@@ -19,6 +19,7 @@ module Gargantext.Database.Schema.ContextNodeNgrams
where where
import Prelude import Prelude
import Gargantext.Core.Types (TermsCount)
import Gargantext.Database.Schema.Prelude import Gargantext.Database.Schema.Prelude
import Gargantext.Database.Schema.Ngrams (NgramsTypeId, NgramsId) import Gargantext.Database.Schema.Ngrams (NgramsTypeId, NgramsId)
import Gargantext.Database.Admin.Types.Node import Gargantext.Database.Admin.Types.Node
...@@ -26,15 +27,16 @@ import Gargantext.Database.Admin.Types.Node ...@@ -26,15 +27,16 @@ import Gargantext.Database.Admin.Types.Node
type ContextNodeNgrams = type ContextNodeNgrams =
ContextNodeNgramsPoly ContextId ListId NgramsId NgramsTypeId Double ContextNodeNgramsPoly ContextId ListId NgramsId NgramsTypeId Double TermsCount
data ContextNodeNgramsPoly c n ngrams_id ngt w data ContextNodeNgramsPoly c n ngrams_id ngt w dc
= ContextNodeNgrams { _cnng_context_id :: !c = ContextNodeNgrams { _cnng_context_id :: !c
, _cnng_node_id :: !n , _cnng_node_id :: !n
, _cnng_ngrams_id :: !ngrams_id , _cnng_ngrams_id :: !ngrams_id
, _cnng_ngramsType :: !ngt , _cnng_ngramsType :: !ngt
, _cnng_weight :: !w , _cnng_weight :: !w
, _cnng_doc_count :: !dc
} deriving (Show) } deriving (Show)
type ContextNodeNgramsWrite = type ContextNodeNgramsWrite =
...@@ -43,6 +45,7 @@ type ContextNodeNgramsWrite = ...@@ -43,6 +45,7 @@ type ContextNodeNgramsWrite =
(Column SqlInt4 ) (Column SqlInt4 )
(Column SqlInt4 ) (Column SqlInt4 )
(Column SqlFloat8) (Column SqlFloat8)
(Column SqlInt4 )
type ContextNodeNgramsRead = type ContextNodeNgramsRead =
ContextNodeNgramsPoly (Column SqlInt4 ) ContextNodeNgramsPoly (Column SqlInt4 )
...@@ -50,6 +53,7 @@ type ContextNodeNgramsRead = ...@@ -50,6 +53,7 @@ type ContextNodeNgramsRead =
(Column SqlInt4 ) (Column SqlInt4 )
(Column SqlInt4 ) (Column SqlInt4 )
(Column SqlFloat8) (Column SqlFloat8)
(Column SqlInt4 )
type ContextNodeNgramsReadNull = type ContextNodeNgramsReadNull =
ContextNodeNgramsPoly (Column (Nullable SqlInt4 )) ContextNodeNgramsPoly (Column (Nullable SqlInt4 ))
...@@ -57,6 +61,7 @@ type ContextNodeNgramsReadNull = ...@@ -57,6 +61,7 @@ type ContextNodeNgramsReadNull =
(Column (Nullable SqlInt4 )) (Column (Nullable SqlInt4 ))
(Column (Nullable SqlInt4 )) (Column (Nullable SqlInt4 ))
(Column (Nullable SqlFloat8)) (Column (Nullable SqlFloat8))
(Column (Nullable SqlInt4 ))
$(makeAdaptorAndInstance "pContextNodeNgrams" ''ContextNodeNgramsPoly) $(makeAdaptorAndInstance "pContextNodeNgrams" ''ContextNodeNgramsPoly)
makeLenses ''ContextNodeNgramsPoly makeLenses ''ContextNodeNgramsPoly
...@@ -70,5 +75,6 @@ contextNodeNgramsTable = Table "context_node_ngrams" ...@@ -70,5 +75,6 @@ contextNodeNgramsTable = Table "context_node_ngrams"
, _cnng_ngrams_id = requiredTableField "ngrams_id" , _cnng_ngrams_id = requiredTableField "ngrams_id"
, _cnng_ngramsType = requiredTableField "ngrams_type" , _cnng_ngramsType = requiredTableField "ngrams_type"
, _cnng_weight = requiredTableField "weight" , _cnng_weight = requiredTableField "weight"
, _cnng_doc_count = requiredTableField "doc_count"
} }
) )
{-| {-|
Module : Gargantext.Database.Schema.NodeNgrams Module : Gargantext.Database.Schema.NodeNgrams
Description : Description :
Copyright : (c) CNRS, 2017-Present Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3 License : AGPL + CECILL v3
Maintainer : team@gargantext.org Maintainer : team@gargantext.org
...@@ -86,7 +86,7 @@ type NgramsClass = Int ...@@ -86,7 +86,7 @@ type NgramsClass = Int
type NgramsText = Text type NgramsText = Text
-- Example of list Ngrams -- Example of list Ngrams
-- type ListNgrams = NodeNgramsPoly (Maybe Int) ListType Text -- type ListNgrams = NodeNgramsPoly (Maybe Int) ListType Text
type NodeNgramsW = type NodeNgramsW =
NodeNgramsPoly (Maybe NodeNgramsId) NodeId ListType NgramsText NodeNgramsPoly (Maybe NodeNgramsId) NodeId ListType NgramsText
......
...@@ -83,7 +83,7 @@ extra-deps: ...@@ -83,7 +83,7 @@ extra-deps:
- git: https://gitlab.iscpif.fr/gargantext/crawlers/isidore.git - git: https://gitlab.iscpif.fr/gargantext/crawlers/isidore.git
commit: 3db385e767d2100d8abe900833c6e7de3ac55e1b commit: 3db385e767d2100d8abe900833c6e7de3ac55e1b
- git: https://gitlab.iscpif.fr/gargantext/crawlers/arxiv-api.git - git: https://gitlab.iscpif.fr/gargantext/crawlers/arxiv-api.git
commit: 4d6ae5aad435c00cdae1d47ebb5281d13d7b172c commit: a2d78abeaec9315be765b90d5e51a4a50c48e7b8
#- git: https://gitlab.iscpif.fr/cgenie/arxiv-api.git #- git: https://gitlab.iscpif.fr/cgenie/arxiv-api.git
#- arxiv-0.0.3@sha256:02de1114091d11f1f3ab401d104d125ad4301260806feb7f63b3dcefc7db88cf,1588 #- arxiv-0.0.3@sha256:02de1114091d11f1f3ab401d104d125ad4301260806feb7f63b3dcefc7db88cf,1588
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment