Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
150
Issues
150
List
Board
Labels
Milestones
Merge Requests
5
Merge Requests
5
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
gargantext
haskell-gargantext
Commits
f42da79b
Commit
f42da79b
authored
Jul 25, 2023
by
Alexandre Delanoë
Browse files
Options
Browse Files
Download
Plain Diff
Merge branch 'dev-250-docsOccurrences' into dev-merge
parents
1cb71785
bfd0f6b8
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
88 additions
and
83 deletions
+88
-83
List.hs
src/Gargantext/API/Ngrams/List.hs
+4
-67
Update.hs
src/Gargantext/API/Node/Update.hs
+1
-1
Flow.hs
src/Gargantext/Database/Action/Flow.hs
+83
-15
No files found.
src/Gargantext/API/Ngrams/List.hs
View file @
f42da79b
...
...
@@ -16,39 +16,29 @@ Portability : POSIX
module
Gargantext.API.Ngrams.List
where
import
Control.Lens
hiding
(
elements
,
Indexed
)
import
Data.Either
(
Either
(
..
))
import
Data.HashMap.Strict
(
HashMap
)
import
Data.Map.Strict
(
Map
,
toList
)
import
Data.Maybe
(
catMaybes
,
fromMaybe
)
import
Data.Set
(
Set
)
import
Data.Maybe
(
fromMaybe
)
import
Data.Text
(
Text
,
concat
,
pack
,
splitOn
)
import
Data.Vector
(
Vector
)
import
Gargantext.API.Admin.EnvTypes
(
Env
,
GargJob
(
..
))
import
Gargantext.Core
(
Lang
,
withDefaultLanguage
)
import
Gargantext.API.Admin.Orchestrator.Types
import
Gargantext.API.Ngrams
(
setListNgrams
)
import
Gargantext.API.Ngrams.List.Types
import
Gargantext.API.Ngrams.Prelude
(
getNgramsList
)
import
Gargantext.API.Ngrams.Tools
(
getTermsWith
)
import
Gargantext.API.Ngrams.Types
import
Gargantext.API.Prelude
(
GargServer
,
GargM
,
GargError
)
import
Gargantext.API.Types
import
Gargantext.Core.NodeStory
import
Gargantext.Core.Text.Terms
(
ExtractedNgrams
(
..
))
import
Gargantext.Core.Text.Terms.WithList
(
MatchedText
,
buildPatternsWith
,
termsInText
)
import
Gargantext.Core.Types
(
TermsCount
)
import
Gargantext.Core.Types.Main
(
ListType
(
..
))
import
Gargantext.Database.Action.Flow
(
saveDocNgrams
With
)
import
Gargantext.Database.Action.Flow
(
reIndex
With
)
import
Gargantext.Database.Action.Flow.Types
(
FlowCmdM
)
-- import Gargantext.Database.Action.Metrics.NgramsByContext (refreshNgramsMaterialized)
import
Gargantext.Database.Admin.Types.Hyperdata.Document
import
Gargantext.Database.Admin.Types.Node
import
Gargantext.Database.Query.Table.Node
(
getNode
,
getNodeWith
)
import
Gargantext.Database.Query.Table.NodeContext
(
selectDocNodes
)
import
Gargantext.Database.Schema.Context
import
Gargantext.Database.Query.Table.Node
(
getNode
)
import
Gargantext.Database.Schema.Ngrams
import
Gargantext.Database.Schema.Node
(
_node_parent_id
,
node_hyperdata
)
import
Gargantext.Database.Schema.Node
(
_node_parent_id
)
import
Gargantext.Database.Types
(
Indexed
(
..
))
import
Gargantext.Prelude
import
Gargantext.Utils.Jobs
(
serveJobsAPI
,
MonadJobStatus
(
..
))
...
...
@@ -56,16 +46,13 @@ import Servant
import
qualified
Data.ByteString.Lazy
as
BSL
import
qualified
Data.Csv
as
Csv
import
qualified
Data.HashMap.Strict
as
HashMap
import
qualified
Data.List
as
List
import
qualified
Data.Map.Strict
as
Map
import
qualified
Data.Set
as
Set
import
qualified
Data.Text
as
Text
import
qualified
Data.Vector
as
Vec
import
qualified
Gargantext.Database.Query.Table.Ngrams
as
TableNgrams
import
qualified
Gargantext.Utils.Servant
as
GUS
import
qualified
Prelude
import
qualified
Protolude
as
P
import
Gargantext.Database.Admin.Types.Hyperdata.Corpus
------------------------------------------------------------------------
type
GETAPI
=
Summary
"Get List"
:>
"lists"
...
...
@@ -145,56 +132,6 @@ setList l m = do
pure
True
------------------------------------------------------------------------
-- | Re-index documents of a corpus with ngrams in the list
reIndexWith
::
(
HasNodeStory
env
err
m
,
FlowCmdM
env
err
m
)
=>
CorpusId
->
ListId
->
NgramsType
->
Set
ListType
->
m
()
reIndexWith
cId
lId
nt
lts
=
do
-- printDebug "(cId,lId,nt,lts)" (cId, lId, nt, lts)
corpus_node
<-
getNodeWith
cId
(
Proxy
@
HyperdataCorpus
)
let
corpusLang
=
withDefaultLanguage
$
view
(
node_hyperdata
.
to
_hc_lang
)
corpus_node
-- Getting [NgramsTerm]
ts
<-
List
.
concat
<$>
map
(
\
(
k
,
vs
)
->
k
:
vs
)
<$>
HashMap
.
toList
<$>
getTermsWith
identity
[
lId
]
nt
lts
-- Get all documents of the corpus
docs
<-
selectDocNodes
cId
let
-- fromListWith (<>)
ngramsByDoc
=
map
(
HashMap
.
fromListWith
(
Map
.
unionWith
(
Map
.
unionWith
(
\
(
_a
,
b
)
(
_a'
,
b'
)
->
(
1
,
b
+
b'
)))))
$
map
(
map
(
\
((
k
,
cnt
),
v
)
->
(
SimpleNgrams
(
text2ngrams
k
),
over
(
traverse
.
traverse
)
(
\
p
->
(
p
,
cnt
))
v
)))
$
map
(
docNgrams
corpusLang
nt
ts
)
docs
-- Saving the indexation in database
_
<-
mapM
(
saveDocNgramsWith
lId
)
ngramsByDoc
pure
()
docNgrams
::
Lang
->
NgramsType
->
[
NgramsTerm
]
->
Gargantext
.
Database
.
Admin
.
Types
.
Node
.
Context
HyperdataDocument
->
[((
MatchedText
,
TermsCount
),
Map
NgramsType
(
Map
NodeId
Int
))]
docNgrams
lang
nt
ts
doc
=
List
.
zip
(
termsInText
lang
(
buildPatternsWith
lang
ts
)
$
Text
.
unlines
$
catMaybes
[
doc
^.
context_hyperdata
.
hd_title
,
doc
^.
context_hyperdata
.
hd_abstract
]
)
(
List
.
cycle
[
Map
.
fromList
$
[(
nt
,
Map
.
singleton
(
doc
^.
context_id
)
1
)]])
toIndexedNgrams
::
HashMap
Text
NgramsId
->
Text
->
Maybe
(
Indexed
Int
Ngrams
)
toIndexedNgrams
m
t
=
Indexed
<$>
i
<*>
n
...
...
src/Gargantext/API/Node/Update.hs
View file @
f42da79b
...
...
@@ -25,7 +25,6 @@ import GHC.Generics (Generic)
import
Gargantext.API.Admin.EnvTypes
(
GargJob
(
..
),
Env
)
import
Gargantext.API.Admin.Orchestrator.Types
(
JobLog
(
..
),
AsyncJobs
)
import
Gargantext.API.Admin.Types
(
HasSettings
)
import
Gargantext.API.Ngrams.List
(
reIndexWith
)
--import Gargantext.API.Ngrams.Types (TabType(..))
import
Gargantext.API.Prelude
(
GargM
,
GargError
,
simuLogs
)
import
Gargantext.Core.Methods.Similarities
(
GraphMetric
(
..
))
...
...
@@ -36,6 +35,7 @@ import Gargantext.Core.Viz.Graph.Types (Strength)
import
Gargantext.Core.Viz.Phylo
(
PhyloSubConfigAPI
(
..
),
subConfigAPI2config
)
import
Gargantext.Core.Viz.Phylo.API.Tools
(
flowPhyloAPI
)
-- import Gargantext.Database.Action.Mail (sendMail)
import
Gargantext.Database.Action.Flow
(
reIndexWith
)
import
Gargantext.Database.Action.Flow.Pairing
(
pairing
)
import
Gargantext.Database.Action.Flow.Types
(
FlowCmdM
)
import
Gargantext.Database.Action.Metrics
(
updateNgramsOccurrences
,
updateContextScore
)
...
...
src/Gargantext/Database/Action/Flow.hs
View file @
f42da79b
...
...
@@ -22,6 +22,8 @@ Portability : POSIX
{-# LANGUAGE InstanceSigs #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE TypeApplications #-}
{-# LANGUAGE TypeOperators #-}
module
Gargantext.Database.Action.Flow
-- (flowDatabase, ngrams2list)
(
DataText
(
..
)
...
...
@@ -37,6 +39,9 @@ module Gargantext.Database.Action.Flow -- (flowDatabase, ngrams2list)
,
insertMasterDocs
,
saveDocNgramsWith
,
reIndexWith
,
docNgrams
,
getOrMkRoot
,
getOrMk_RootWithCorpus
,
TermType
(
..
)
...
...
@@ -49,10 +54,9 @@ module Gargantext.Database.Action.Flow -- (flowDatabase, ngrams2list)
where
import
Conduit
import
Control.Lens
((
^.
),
view
,
_Just
,
makeLenses
,
over
,
traverse
)
import
Control.Lens
hiding
(
elements
,
Indexed
)
import
Data.Aeson.TH
(
deriveJSON
)
import
Data.Conduit.Internal
(
zipSources
)
import
qualified
Data.Conduit.List
as
CList
import
Data.Either
import
Data.HashMap.Strict
(
HashMap
)
import
Data.Hashable
(
Hashable
)
...
...
@@ -60,22 +64,18 @@ import Data.List (concat)
import
Data.Map.Strict
(
Map
,
lookup
)
import
Data.Maybe
(
catMaybes
)
import
Data.Monoid
import
Data.Proxy
import
Data.Set
(
Set
)
import
Data.Swagger
import
qualified
Data.Text
as
T
import
Data.Tuple.Extra
(
first
,
second
)
import
GHC.Generics
(
Generic
)
import
System.FilePath
(
FilePath
)
import
qualified
Data.HashMap.Strict
as
HashMap
import
qualified
Gargantext.Data.HashMap.Strict.Utils
as
HashMap
import
qualified
Data.Map.Strict
as
Map
import
qualified
Data.Conduit.List
as
CL
import
qualified
Data.Conduit
as
C
import
Gargantext.API.Ngrams.Tools
(
getTermsWith
)
import
Gargantext.Core
(
Lang
(
..
),
PosTagAlgo
(
..
))
-- import Gargantext.Core.Ext.IMT (toSchoolNam
e)
import
Gargantext.Core
(
withDefaultLanguag
e
)
import
Gargantext.Core.Ext.IMTUser
(
readFile_Annuaire
)
import
Gargantext.Core.Flow.Types
import
Gargantext.Core.NLP
(
nlpServerGet
)
import
Gargantext.Core.NodeStory
(
HasNodeStory
)
import
Gargantext.Core.Text
import
Gargantext.Core.Text.Corpus.Parsers
(
parseFile
,
FileFormat
,
FileType
,
splitOn
)
import
Gargantext.Core.Text.List
(
buildNgramsLists
)
...
...
@@ -83,6 +83,7 @@ import Gargantext.Core.Text.List.Group.WithStem ({-StopSize(..),-} GroupParams(.
import
Gargantext.Core.Text.List.Social
(
FlowSocialListWith
(
..
))
import
Gargantext.Core.Text.Terms
import
Gargantext.Core.Text.Terms.Mono.Stem.En
(
stemIt
)
import
Gargantext.Core.Text.Terms.WithList
(
MatchedText
,
buildPatternsWith
,
termsInText
)
import
Gargantext.Core.Types
(
POS
(
NP
),
TermsCount
)
import
Gargantext.Core.Types.Individu
(
User
(
..
))
import
Gargantext.Core.Types.Main
...
...
@@ -92,9 +93,9 @@ import Gargantext.Core.Utils.Prefix (unPrefix, unPrefixSwagger)
import
Gargantext.Database.Action.Flow.List
import
Gargantext.Database.Action.Flow.Types
import
Gargantext.Database.Action.Flow.Utils
(
insertDocNgrams
,
DocumentIdWithNgrams
(
..
))
import
Gargantext.Database.Action.Metrics
(
updateNgramsOccurrences
,
updateContextScore
)
import
Gargantext.Database.Action.Search
(
searchDocInDatabase
)
import
Gargantext.Database.Admin.Config
(
userMaster
,
corpusMasterName
)
import
Gargantext.Database.Action.Metrics
(
updateNgramsOccurrences
)
import
Gargantext.Database.Admin.Types.Hyperdata
import
Gargantext.Database.Admin.Types.Node
-- (HyperdataDocument(..), NodeType(..), NodeId, UserId, ListId, CorpusId, RootId, MasterCorpusId, MasterUserId)
import
Gargantext.Database.Prelude
...
...
@@ -103,17 +104,29 @@ import Gargantext.Database.Query.Table.Ngrams
import
Gargantext.Database.Query.Table.Node
import
Gargantext.Database.Query.Table.Node.Document.Insert
-- (insertDocuments, ReturnId(..), addUniqIdsDoc, addUniqIdsContact, ToDbData(..))
import
Gargantext.Database.Query.Table.Node.Error
(
HasNodeError
(
..
))
import
Gargantext.Database.Query.Table.NodeContext
(
selectDocNodes
)
import
Gargantext.Database.Query.Table.NodeNgrams
(
listInsertDb
,
getCgramsId
)
import
Gargantext.Database.Query.Tree.Root
(
getOrMkRoot
,
getOrMk_RootWithCorpus
)
import
Gargantext.Database.Schema.Context
import
Gargantext.Database.Schema.Node
(
NodePoly
(
..
),
node_id
)
import
Gargantext.Database.Schema.Node
(
node_hyperdata
)
import
Gargantext.Database.Types
import
Gargantext.Prelude
import
Gargantext.Prelude.Crypto.Hash
(
Hash
)
import
Gargantext.Utils.Jobs
(
JobHandle
,
MonadJobStatus
(
..
))
import
System.FilePath
(
FilePath
)
import
qualified
Data.Conduit
as
C
import
qualified
Data.Conduit.List
as
CL
import
qualified
Data.Conduit.List
as
CList
import
qualified
Data.HashMap.Strict
as
HashMap
import
qualified
Data.Map.Strict
as
Map
import
qualified
Data.Set
as
Set
import
qualified
Data.Text
as
T
import
qualified
Gargantext.API.Ngrams.Types
as
NT
import
qualified
Gargantext.Core.Text.Corpus.API
as
API
import
qualified
Gargantext.Data.HashMap.Strict.Utils
as
HashMap
import
qualified
Gargantext.Database.Query.Table.Node.Document.Add
as
Doc
(
add
)
import
qualified
PUBMED.Types
as
PUBMED
--import qualified Prelude
------------------------------------------------------------------------
-- Imports for upgrade function
...
...
@@ -339,7 +352,7 @@ createNodes user corpusName ctype = do
-- User Graph Flow
_
<-
insertDefaultNodeIfNotExists
NodeGraph
userCorpusId
userId
_
<-
insertDefaultNodeIfNotExists
NodeDashboard
userCorpusId
userId
--
_ <- insertDefaultNodeIfNotExists NodeDashboard userCorpusId userId
pure
(
userId
,
userCorpusId
,
listId
)
...
...
@@ -376,10 +389,11 @@ flowCorpusUser l user userCorpusId listId ctype mfslw = do
_mastListId
<-
getOrMkList
masterCorpusId
masterUserId
pure
()
-- _ <- insertOccsUpdates userCorpusId mastListId
-- printDebug "userListId" userListId
--
_
<-
mkPhylo
userCorpusId
userId
-- Annuaire Flow
-- _ <- mkAnnuaire rootUserId userId
_
<-
reIndexWith
userCorpusId
listId
NgramsTerms
(
Set
.
singleton
MapTerm
)
_
<-
updateContextScore
userCorpusId
(
Just
listId
)
_
<-
updateNgramsOccurrences
userCorpusId
(
Just
listId
)
pure
userCorpusId
...
...
@@ -612,3 +626,57 @@ extractInsert docs = do
documentsWithId
_
<-
insertExtractedNgrams
$
HashMap
.
keys
mapNgramsDocs'
pure
()
-- | Re-index documents of a corpus with ngrams in the list
reIndexWith
::
(
HasNodeStory
env
err
m
,
FlowCmdM
env
err
m
)
=>
CorpusId
->
ListId
->
NgramsType
->
Set
ListType
->
m
()
reIndexWith
cId
lId
nt
lts
=
do
-- printDebug "(cId,lId,nt,lts)" (cId, lId, nt, lts)
corpus_node
<-
getNodeWith
cId
(
Proxy
@
HyperdataCorpus
)
let
corpusLang
=
withDefaultLanguage
$
view
(
node_hyperdata
.
to
_hc_lang
)
corpus_node
-- Getting [NgramsTerm]
ts
<-
List
.
concat
<$>
map
(
\
(
k
,
vs
)
->
k
:
vs
)
<$>
HashMap
.
toList
<$>
getTermsWith
identity
[
lId
]
nt
lts
-- Get all documents of the corpus
docs
<-
selectDocNodes
cId
let
-- fromListWith (<>)
ngramsByDoc
=
map
(
HashMap
.
fromListWith
(
Map
.
unionWith
(
Map
.
unionWith
(
\
(
_a
,
b
)
(
_a'
,
b'
)
->
(
1
,
b
+
b'
)))))
$
map
(
map
(
\
((
k
,
cnt
),
v
)
->
(
SimpleNgrams
(
text2ngrams
k
),
over
(
traverse
.
traverse
)
(
\
p
->
(
p
,
cnt
))
v
)))
$
map
(
docNgrams
corpusLang
nt
ts
)
docs
-- Saving the indexation in database
_
<-
mapM
(
saveDocNgramsWith
lId
)
ngramsByDoc
pure
()
docNgrams
::
Lang
->
NgramsType
->
[
NT
.
NgramsTerm
]
->
Gargantext
.
Database
.
Admin
.
Types
.
Node
.
Context
HyperdataDocument
->
[((
MatchedText
,
TermsCount
),
Map
NgramsType
(
Map
NodeId
Int
))]
docNgrams
lang
nt
ts
doc
=
List
.
zip
(
termsInText
lang
(
buildPatternsWith
lang
ts
)
$
T
.
unlines
$
catMaybes
[
doc
^.
context_hyperdata
.
hd_title
,
doc
^.
context_hyperdata
.
hd_abstract
]
)
(
List
.
cycle
[
Map
.
fromList
$
[(
nt
,
Map
.
singleton
(
doc
^.
context_id
)
1
)]])
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment