Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Christian Merten
haskell-gargantext
Commits
72f5f35c
Commit
72f5f35c
authored
Mar 22, 2024
by
Alexandre Delanoë
Browse files
Options
Browse Files
Download
Plain Diff
Merge remote-tracking branch 'origin/169-dev-singulars-plurals' into dev
parents
bffbc619
445ba8f1
Changes
53
Hide whitespace changes
Inline
Side-by-side
Showing
53 changed files
with
502 additions
and
483 deletions
+502
-483
Main.hs
bin/gargantext-phylo/Main.hs
+1
-1
Common.hs
bin/gargantext-phylo/Phylo/Common.hs
+1
-1
gargantext.cabal
gargantext.cabal
+9
-8
Ngrams.hs
src/Gargantext/API/Ngrams.hs
+12
-13
List.hs
src/Gargantext/API/Ngrams/List.hs
+8
-9
Prelude.hs
src/Gargantext/API/Ngrams/Prelude.hs
+2
-2
Tools.hs
src/Gargantext/API/Ngrams/Tools.hs
+3
-4
Types.hs
src/Gargantext/API/Ngrams/Types.hs
+10
-10
Export.hs
src/Gargantext/API/Node/Corpus/Export.hs
+3
-2
Types.hs
src/Gargantext/API/Node/Corpus/Export/Types.hs
+4
-4
Update.hs
src/Gargantext/API/Node/Update.hs
+10
-11
Types.hs
src/Gargantext/Core/Flow/Types.hs
+2
-2
NodeStory.hs
src/Gargantext/Core/NodeStory.hs
+10
-10
DB.hs
src/Gargantext/Core/NodeStory/DB.hs
+7
-10
Types.hs
src/Gargantext/Core/NodeStory/Types.hs
+6
-6
Parsers.hs
src/Gargantext/Core/Text/Corpus/Parsers.hs
+2
-2
List.hs
src/Gargantext/Core/Text/List.hs
+47
-40
Group.hs
src/Gargantext/Core/Text/List/Group.hs
+6
-12
WithStem.hs
src/Gargantext/Core/Text/List/Group/WithStem.hs
+7
-8
Social.hs
src/Gargantext/Core/Text/List/Social.hs
+5
-6
Patch.hs
src/Gargantext/Core/Text/List/Social/Patch.hs
+4
-5
Ngrams.hs
src/Gargantext/Core/Text/Ngrams.hs
+109
-0
Terms.hs
src/Gargantext/Core/Text/Terms.hs
+20
-19
Multi.hs
src/Gargantext/Core/Text/Terms/Multi.hs
+5
-9
Group.hs
src/Gargantext/Core/Text/Terms/Multi/Group.hs
+1
-1
En.hs
src/Gargantext/Core/Text/Terms/Multi/Lang/En.hs
+3
-4
Chart.hs
src/Gargantext/Core/Viz/Chart.hs
+14
-15
Graph.hs
src/Gargantext/Core/Viz/Graph.hs
+1
-3
API.hs
src/Gargantext/Core/Viz/Graph/API.hs
+12
-12
Tools.hs
src/Gargantext/Core/Viz/Graph/Tools.hs
+8
-7
Types.hs
src/Gargantext/Core/Viz/Graph/Types.hs
+2
-3
Tools.hs
src/Gargantext/Core/Viz/Phylo/API/Tools.hs
+12
-13
LegacyMain.hs
src/Gargantext/Core/Viz/Phylo/Legacy/LegacyMain.hs
+10
-10
Flow.hs
src/Gargantext/Database/Action/Flow.hs
+35
-29
Extract.hs
src/Gargantext/Database/Action/Flow/Extract.hs
+11
-7
List.hs
src/Gargantext/Database/Action/Flow/List.hs
+3
-6
Pairing.hs
src/Gargantext/Database/Action/Flow/Pairing.hs
+11
-13
Types.hs
src/Gargantext/Database/Action/Flow/Types.hs
+9
-9
Utils.hs
src/Gargantext/Database/Action/Flow/Utils.hs
+4
-6
NgramsByContext.hs
src/Gargantext/Database/Action/Metrics/NgramsByContext.hs
+4
-3
TFICF.hs
src/Gargantext/Database/Action/Metrics/TFICF.hs
+1
-3
Search.hs
src/Gargantext/Database/Action/Search.hs
+11
-10
Ngrams.hs
src/Gargantext/Database/Query/Table/Ngrams.hs
+1
-1
NgramsPostag.hs
src/Gargantext/Database/Query/Table/NgramsPostag.hs
+6
-5
NodeContext.hs
src/Gargantext/Database/Query/Table/NodeContext.hs
+6
-4
NodeNgrams.hs
src/Gargantext/Database/Query/Table/NodeNgrams.hs
+2
-2
NodeNode.hs
src/Gargantext/Database/Query/Table/NodeNode.hs
+5
-3
Ngrams.hs
src/Gargantext/Database/Schema/Ngrams.hs
+15
-96
NgramsPostag.hs
src/Gargantext/Database/Schema/NgramsPostag.hs
+2
-2
NodeNgrams.hs
src/Gargantext/Database/Schema/NodeNgrams.hs
+3
-2
SpacyNLP.hs
src/Gargantext/Utils/SpacyNLP.hs
+14
-16
UpdateList.hs
test/Test/API/UpdateList.hs
+2
-3
NodeStory.hs
test/Test/Database/Operations/NodeStory.hs
+1
-1
No files found.
bin/gargantext-phylo/Main.hs
View file @
72f5f35c
...
...
@@ -34,6 +34,7 @@ import Gargantext.Core.Text.Corpus.Parsers (FileFormat(..), FileType(..), parseF
import
Gargantext.Core.Text.Corpus.Parsers.CSV
(
csv_title
,
csv_abstract
,
csv_publication_year
,
csv_publication_month
,
csv_publication_day
,
csv'_source
,
csv'_title
,
csv'_abstract
,
csv'_publication_year
,
csv'_publication_month
,
csv'_publication_day
,
csv'_weight
)
import
Gargantext.Core.Text.Corpus.Parsers.CSV
qualified
as
Csv
import
Gargantext.Core.Text.List.Formats.CSV
(
csvMapTermList
)
import
Gargantext.Core.Text.Ngrams
(
NgramsType
(
..
))
import
Gargantext.Core.Text.Terms.WithList
(
Patterns
,
buildPatterns
,
extractTermsWithList
)
import
Gargantext.Core.Types.Main
(
ListType
(
..
))
import
Gargantext.Core.Viz.Phylo
...
...
@@ -42,7 +43,6 @@ import Gargantext.Core.Viz.Phylo.PhyloExport (toPhyloExport, dotToFile)
import
Gargantext.Core.Viz.Phylo.PhyloMaker
(
toPhylo
,
toPhyloWithoutLink
)
import
Gargantext.Core.Viz.Phylo.PhyloTools
(
printIOMsg
,
printIOComment
,
setConfig
,
toPeriods
,
getTimePeriod
,
getTimeStep
)
import
Gargantext.Database.Admin.Types.Hyperdata
(
HyperdataDocument
(
..
))
import
Gargantext.Database.Schema.Ngrams
(
NgramsType
(
..
))
import
Gargantext.Prelude
hiding
(
hash
,
replace
)
import
Prelude
qualified
import
System.Directory
(
listDirectory
,
doesFileExist
)
...
...
bin/gargantext-phylo/Phylo/Common.hs
View file @
72f5f35c
...
...
@@ -18,13 +18,13 @@ import Gargantext.Core.Text.Corpus.Parsers (FileFormat(..), FileType(..), parseF
import
Gargantext.Core.Text.Corpus.Parsers.CSV
(
csv_title
,
csv_abstract
,
csv_publication_year
,
csv_publication_month
,
csv_publication_day
,
csv'_source
,
csv'_title
,
csv'_abstract
,
csv'_publication_year
,
csv'_publication_month
,
csv'_publication_day
,
csv'_weight
)
import
Gargantext.Core.Text.Corpus.Parsers.CSV
qualified
as
Csv
import
Gargantext.Core.Text.List.Formats.CSV
(
csvMapTermList
)
import
Gargantext.Core.Text.Ngrams
(
NgramsType
(
..
))
import
Gargantext.Core.Text.Terms.WithList
(
Patterns
,
buildPatterns
,
extractTermsWithList
)
import
Gargantext.Core.Types.Main
(
ListType
(
..
))
import
Gargantext.Core.Viz.Phylo
import
Gargantext.Core.Viz.Phylo.API.Tools
import
Gargantext.Core.Viz.Phylo.PhyloTools
(
toPeriods
,
getTimePeriod
,
getTimeStep
)
import
Gargantext.Database.Admin.Types.Hyperdata
(
HyperdataDocument
(
..
))
import
Gargantext.Database.Schema.Ngrams
(
NgramsType
(
..
))
import
Gargantext.Prelude
hiding
(
hash
,
replace
)
import
Prelude
qualified
import
System.Directory
(
listDirectory
)
...
...
gargantext.cabal
View file @
72f5f35c
...
...
@@ -100,9 +100,9 @@ library
Gargantext.API
Gargantext.API.Admin.Auth.Types
Gargantext.API.Admin.EnvTypes
Gargantext.API.Admin.Orchestrator.Types
Gargantext.API.Admin.Settings
Gargantext.API.Admin.Settings.CORS
Gargantext.API.Admin.Orchestrator.Types
Gargantext.API.Admin.Types
Gargantext.API.Auth.PolicyCheck
Gargantext.API.Dev
...
...
@@ -141,18 +141,21 @@ library
Gargantext.Core.Text.Corpus.API
Gargantext.Core.Text.Corpus.API.Arxiv
Gargantext.Core.Text.Corpus.API.EPO
Gargantext.Core.Text.Corpus.API.Pubmed
Gargantext.Core.Text.Corpus.API.OpenAlex
Gargantext.Core.Text.Corpus.
Query
Gargantext.Core.Text.Corpus.
API.Pubmed
Gargantext.Core.Text.Corpus.Parsers
Gargantext.Core.Text.Corpus.Parsers.CSV
Gargantext.Core.Text.Corpus.Parsers.Date
Gargantext.Core.Text.Corpus.Parsers.Date.Parsec
Gargantext.Core.Text.Corpus.Query
Gargantext.Core.Text.List
Gargantext.Core.Text.List.Group.WithStem
Gargantext.Core.Text.List.Formats.CSV
Gargantext.Core.Text.Metrics
Gargantext.Core.Text.Metrics.CharByChar
Gargantext.Core.Text.Metrics.Count
Gargantext.Core.Text.Metrics.TFICF
Gargantext.Core.Text.Ngrams
Gargantext.Core.Text.Prepare
Gargantext.Core.Text.Search
Gargantext.Core.Text.Terms
...
...
@@ -170,8 +173,8 @@ library
Gargantext.Core.Types
Gargantext.Core.Types.Individu
Gargantext.Core.Types.Main
Gargantext.Core.Types.Query
Gargantext.Core.Types.Phylo
Gargantext.Core.Types.Query
Gargantext.Core.Utils
Gargantext.Core.Utils.Prefix
Gargantext.Core.Viz.Graph
...
...
@@ -189,6 +192,7 @@ library
Gargantext.Core.Viz.Types
Gargantext.Database.Action.Flow
Gargantext.Database.Action.Flow.Types
Gargantext.Database.Action.Metrics.TFICF
Gargantext.Database.Action.Search
Gargantext.Database.Action.User
Gargantext.Database.Action.User.New
...
...
@@ -205,8 +209,8 @@ library
Gargantext.Database.Query.Table.Node
Gargantext.Database.Query.Table.Node.Error
Gargantext.Database.Query.Table.Node.UpdateOpaleye
Gargantext.Database.Query.Tree.Root
Gargantext.Database.Query.Table.User
Gargantext.Database.Query.Tree.Root
Gargantext.Database.Schema.Ngrams
Gargantext.Database.Schema.Node
Gargantext.Database.Schema.User
...
...
@@ -309,11 +313,9 @@ library
Gargantext.Core.Text.Corpus.Parsers.Wikidata.Crawler
Gargantext.Core.Text.Corpus.Parsers.Wikimedia
Gargantext.Core.Text.Learn
Gargantext.Core.Text.List
Gargantext.Core.Text.List.Group
Gargantext.Core.Text.List.Group.Prelude
Gargantext.Core.Text.List.Group.WithScores
Gargantext.Core.Text.List.Group.WithStem
Gargantext.Core.Text.List.Learn
Gargantext.Core.Text.List.Merge
Gargantext.Core.Text.List.Social
...
...
@@ -365,7 +367,6 @@ library
Gargantext.Database.Action.Metrics
Gargantext.Database.Action.Metrics.Lists
Gargantext.Database.Action.Metrics.NgramsByContext
Gargantext.Database.Action.Metrics.TFICF
Gargantext.Database.Action.Node
Gargantext.Database.Action.Share
Gargantext.Database.Action.TSQuery
...
...
src/Gargantext/API/Ngrams.hs
View file @
72f5f35c
...
...
@@ -19,11 +19,10 @@ add get
{-# OPTIONS_GHC -fno-warn-deprecations #-}
{-# LANGUAGE ConstraintKinds #-}
{-# LANGUAGE IncoherentInstances #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TypeOperators #-}
{-# LANGUAGE TypeFamilies #-}
{-# LANGUAGE IncoherentInstances #-}
{-# LANGUAGE TypeOperators #-}
module
Gargantext.API.Ngrams
...
...
@@ -106,13 +105,13 @@ import Gargantext.API.Ngrams.Tools (getNodeStory)
import
Gargantext.API.Ngrams.Types
import
Gargantext.API.Prelude
(
GargM
)
import
Gargantext.Core.NodeStory
(
ArchiveList
,
HasNodeStory
,
HasNodeArchiveStoryImmediateSaver
(
..
),
HasNodeStoryImmediateSaver
(
..
),
NgramsStatePatch
'
,
a_history
,
a_state
,
a_version
,
currentVersion
)
import
Gargantext.Core.Text.Ngrams
(
Ngrams
,
NgramsType
)
import
Gargantext.Core.Types
(
ListType
(
..
),
NodeId
,
ListId
,
DocId
,
TODO
,
assertValid
,
HasValidationError
,
ContextId
)
import
Gargantext.Core.Types.Query
(
Limit
(
..
),
Offset
(
..
),
MinSize
(
..
),
MaxSize
(
..
))
import
Gargantext.Database.Action.Metrics.NgramsByContext
(
getOccByNgramsOnlyFast
)
import
Gargantext.Database.Admin.Config
(
userMaster
)
import
Gargantext.Database.Admin.Types.Node
(
NodeType
(
..
))
import
Gargantext.Database.Query.Table.Ngrams
(
text2ngrams
,
Ngrams
,
insertNgrams
,
selectNgramsByDoc
)
import
Gargantext.Database.Schema.Ngrams
qualified
as
TableNgrams
import
Gargantext.Database.Query.Table.Ngrams
(
text2ngrams
,
insertNgrams
,
selectNgramsByDoc
)
import
Gargantext.Database.Query.Table.Node
(
getNode
)
import
Gargantext.Database.Query.Table.Node.Error
(
HasNodeError
)
import
Gargantext.Database.Query.Table.Node.Select
(
selectNodesWithUsername
)
...
...
@@ -179,7 +178,7 @@ listTypeConflictResolution :: ListType -> ListType -> ListType
listTypeConflictResolution
_
_
=
undefined
-- TODO Use Map User ListType
ngramsStatePatchConflictResolution
::
TableNgrams
.
NgramsType
ngramsStatePatchConflictResolution
::
NgramsType
->
NgramsTerm
->
ConflictResolutionNgramsPatch
ngramsStatePatchConflictResolution
_ngramsType
_ngramsTerm
...
...
@@ -234,7 +233,7 @@ addListNgrams listId ngramsType nes = do
-- UNSAFE
setListNgrams
::
HasNodeStory
env
err
m
=>
NodeId
->
TableNgrams
.
NgramsType
->
NgramsType
->
Map
NgramsTerm
NgramsRepoElement
->
m
()
setListNgrams
listId
ngramsType
ns
=
do
...
...
@@ -357,7 +356,7 @@ commitStatePatch listId (Versioned _p_version p) = do
-- This is a special case of tableNgramsPut where the input patch is empty.
tableNgramsPull
::
HasNodeStory
env
err
m
=>
ListId
->
TableNgrams
.
NgramsType
->
NgramsType
->
Version
->
m
(
Versioned
NgramsTablePatch
)
tableNgramsPull
listId
ngramsType
p_version
=
do
...
...
@@ -487,7 +486,7 @@ tableNgramsPostChartsAsync utn jobHandle = do
getNgramsTableMap
::
HasNodeStory
env
err
m
=>
NodeId
->
TableNgrams
.
NgramsType
->
NgramsType
->
m
(
Versioned
NgramsTableMap
)
getNgramsTableMap
nodeId
ngramsType
=
do
a
<-
getNodeStory
nodeId
...
...
@@ -498,7 +497,7 @@ getNgramsTableMap nodeId ngramsType = do
dumpJsonTableMap
::
HasNodeStory
env
err
m
=>
Text
->
NodeId
->
TableNgrams
.
NgramsType
->
NgramsType
->
m
()
dumpJsonTableMap
fpath
nodeId
ngramsType
=
do
m
<-
getNgramsTableMap
nodeId
ngramsType
...
...
@@ -617,7 +616,7 @@ getNgramsTable' :: forall env err m.
,
HasNodeError
err
)
=>
NodeId
->
ListId
->
TableNgrams
.
NgramsType
->
NgramsType
->
m
(
Versioned
(
Map
.
Map
NgramsTerm
NgramsElement
))
getNgramsTable'
nId
listId
ngramsType
=
do
tableMap
<-
getNgramsTableMap
listId
ngramsType
...
...
@@ -631,7 +630,7 @@ setNgramsTableScores :: forall env err m t.
,
HasNodeError
err
)
=>
NodeId
->
ListId
->
TableNgrams
.
NgramsType
->
NgramsType
->
t
->
m
t
setNgramsTableScores
nId
listId
ngramsType
table
=
do
...
...
@@ -821,7 +820,7 @@ apiNgramsAsync _dId =
-- * listNgramsChangedSince: good precision, good bandwidth, bad computation.
-- * tableNgramsPull: good precision, good bandwidth (if you use the received data!), bad computation.
listNgramsChangedSince
::
HasNodeStory
env
err
m
=>
ListId
->
TableNgrams
.
NgramsType
->
Version
->
m
(
Versioned
Bool
)
=>
ListId
->
NgramsType
->
Version
->
m
(
Versioned
Bool
)
listNgramsChangedSince
listId
ngramsType
version
|
version
<
0
=
Versioned
<$>
currentVersion
listId
<*>
pure
True
...
...
src/Gargantext/API/Ngrams/List.hs
View file @
72f5f35c
...
...
@@ -10,7 +10,6 @@ Portability : POSIX
-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE TypeApplications #-}
{-# LANGUAGE TypeOperators #-}
{-# LANGUAGE ViewPatterns #-}
...
...
@@ -29,21 +28,21 @@ import Data.Text (concat, pack, splitOn)
import
Data.Vector
(
Vector
)
import
Data.Vector
qualified
as
Vec
import
Gargantext.API.Admin.EnvTypes
(
Env
,
GargJob
(
..
))
import
Gargantext.API.Admin.Orchestrator.Types
import
Gargantext.API.Errors.Types
import
Gargantext.API.Admin.Orchestrator.Types
(
AsyncJobs
,
JobLog
)
import
Gargantext.API.Errors.Types
(
BackendInternalError
)
import
Gargantext.API.Ngrams
(
setListNgrams
)
import
Gargantext.API.Ngrams.List.Types
import
Gargantext.API.Ngrams.Prelude
(
getNgramsList
)
import
Gargantext.API.Ngrams.Types
import
Gargantext.API.Prelude
(
GargServer
,
GargM
,
serverError
,
HasServerError
)
import
Gargantext.API.Types
import
Gargantext.Core.NodeStory
import
Gargantext.API.Types
(
HTML
)
import
Gargantext.Core.NodeStory.Types
(
HasNodeStory
)
import
Gargantext.Core.Text.Ngrams
(
Ngrams
,
NgramsType
(
NgramsTerms
))
import
Gargantext.Core.Types.Main
(
ListType
(
..
))
import
Gargantext.Database.Action.Flow
(
reIndexWith
)
import
Gargantext.Database.Admin.Types.Node
import
Gargantext.Database.Query.Table.Ngrams
qualified
as
TableNgrams
import
Gargantext.Database.Admin.Types.Node
(
NodeId
(
_NodeId
),
ListId
)
import
Gargantext.Database.Query.Table.Node
(
getNode
)
import
Gargantext.Database.Schema.Ngrams
import
Gargantext.Database.Schema.Ngrams
(
text2ngrams
,
NgramsId
)
import
Gargantext.Database.Schema.Node
(
_node_parent_id
)
import
Gargantext.Database.Types
(
Indexed
(
..
))
import
Gargantext.Prelude
hiding
(
concat
,
toList
)
...
...
@@ -113,7 +112,7 @@ getCsv :: HasNodeStory env err m
->
m
(
Headers
'[
H
eader
"Content-Disposition"
Text
]
NgramsTableMap
)
getCsv
lId
=
do
lst
<-
getNgramsList
lId
pure
$
case
Map
.
lookup
TableNgrams
.
NgramsTerms
lst
of
pure
$
case
Map
.
lookup
NgramsTerms
lst
of
Nothing
->
noHeader
Map
.
empty
Just
(
Versioned
{
_v_data
})
->
addHeader
(
concat
[
"attachment; filename=GarganText_NgramsList-"
...
...
src/Gargantext/API/Ngrams/Prelude.hs
View file @
72f5f35c
...
...
@@ -25,9 +25,9 @@ import Gargantext.API.Ngrams.Types
import
Gargantext.Core.NodeStory.Types
(
HasNodeStory
)
import
Gargantext.Core.Text.Context
(
TermList
)
import
Gargantext.Core.Text.List.Social.Prelude
(
unPatchMapToHashMap
)
import
Gargantext.Core.Text.Ngrams
(
NgramsType
,
ngramsTypes
)
import
Gargantext.Core.Types.Main
(
ListType
)
import
Gargantext.Database.Admin.Types.Node
(
ListId
)
import
Gargantext.Database.Schema.Ngrams
(
NgramsType
,
ngramsTypes
)
import
Gargantext.Prelude
...
...
@@ -61,7 +61,7 @@ toTermList lt nt nl = toTermList' lt <$> Map.lookup nt nl
where
toTerm
=
Text
.
splitOn
" "
.
unNgramsTerm
(
roots
,
children
)
=
List
.
partition
(
\
(
_t
,
nre
)
->
view
nre_root
nre
==
Nothing
)
(
roots
,
children
)
=
List
.
partition
(
\
(
_t
,
nre
)
->
isNothing
(
view
nre_root
nre
)
)
$
List
.
filter
(
\
(
_t
,
nre
)
->
view
nre_list
nre
==
lt''
)
ns
roots'
=
map
(
\
(
t
,
nre
)
->
(
t
,
map
toTerm
$
unMSet
$
view
nre_children
nre
))
roots
...
...
src/Gargantext/API/Ngrams/Tools.hs
View file @
72f5f35c
...
...
@@ -12,7 +12,6 @@ Portability : POSIX
{-# LANGUAGE TypeFamilies #-}
{-# LANGUAGE TypeOperators #-}
{-# OPTIONS_GHC -Wno-unrecognised-pragmas #-}
{-# HLINT ignore "Use infix" #-}
module
Gargantext.API.Ngrams.Tools
where
...
...
@@ -24,11 +23,11 @@ import Data.HashMap.Strict qualified as HM
import
Data.Map.Strict
qualified
as
Map
import
Data.Set
qualified
as
Set
-- import GHC.Conc (TVar, readTVar)
import
Gargantext.API.Ngrams.Types
import
Gargantext.Core.NodeStory
import
Gargantext.API.Ngrams.Types
(
NgramsTerm
(
unNgramsTerm
),
NgramsRepoElement
(
_nre_root
,
_nre_list
)
)
import
Gargantext.Core.NodeStory.Types
import
Gargantext.Core.Text.Ngrams
(
NgramsType
)
import
Gargantext.Core.Types.Main
(
ListType
(
..
)
)
import
Gargantext.Database.Admin.Types.Node
(
NodeId
,
ListId
)
import
Gargantext.Database.Schema.Ngrams
(
NgramsType
)
import
Gargantext.Prelude
...
...
src/Gargantext/API/Ngrams/Types.hs
View file @
72f5f35c
...
...
@@ -37,12 +37,12 @@ import Data.Validity ( Validity(..) )
import
Database.PostgreSQL.Simple.FromField
(
FromField
,
fromField
,
fromJSONField
)
import
Database.PostgreSQL.Simple.ToField
(
ToField
,
toJSONField
,
toField
)
import
Gargantext.Core.Text
(
size
)
import
Gargantext.Core.Text.Ngrams
qualified
as
Ngrams
import
Gargantext.Core.Types
(
ListType
(
..
),
ListId
,
NodeId
,
TODO
)
import
Gargantext.Core.Types.Query
(
Limit
,
Offset
,
MaxSize
,
MinSize
)
import
Gargantext.Core.Utils.Prefix
(
unPrefix
,
unPrefixUntagged
,
unPrefixSwagger
,
wellNamedSchema
)
import
Gargantext.Database.Admin.Types.Node
(
ContextId
)
import
Gargantext.Database.Prelude
(
fromField'
,
HasConnectionPool
,
HasConfig
,
CmdM
'
)
import
Gargantext.Database.Schema.Ngrams
qualified
as
TableNgrams
import
Gargantext.Prelude
hiding
(
IsString
,
hash
,
from
,
replace
,
to
)
import
Gargantext.Prelude.Crypto.Hash
(
IsHashable
(
..
))
import
Gargantext.Utils.Servant
(
CSV
,
ZIP
)
...
...
@@ -551,7 +551,7 @@ instance ToField NgramsTablePatch
where
toField
=
toJSONField
instance
FromField
(
PatchMap
Table
Ngrams
.
NgramsType
(
PatchMap
NodeId
NgramsTablePatch
))
instance
FromField
(
PatchMap
Ngrams
.
NgramsType
(
PatchMap
NodeId
NgramsTablePatch
))
where
fromField
=
fromField'
...
...
@@ -747,21 +747,21 @@ type RepoCmdM env err m =
-- Instances
instance
FromHttpApiData
(
Map
Table
Ngrams
.
NgramsType
(
Versioned
NgramsTableMap
))
instance
FromHttpApiData
(
Map
Ngrams
.
NgramsType
(
Versioned
NgramsTableMap
))
where
parseUrlPiece
x
=
maybeToEither
x
(
decode
$
cs
x
)
instance
ToHttpApiData
(
Map
Table
Ngrams
.
NgramsType
(
Versioned
NgramsTableMap
))
where
instance
ToHttpApiData
(
Map
Ngrams
.
NgramsType
(
Versioned
NgramsTableMap
))
where
toUrlPiece
m
=
cs
(
encode
m
)
ngramsTypeFromTabType
::
TabType
->
Table
Ngrams
.
NgramsType
ngramsTypeFromTabType
::
TabType
->
Ngrams
.
NgramsType
ngramsTypeFromTabType
tabType
=
let
here
=
"Garg.API.Ngrams: "
::
Text
in
case
tabType
of
Sources
->
Table
Ngrams
.
Sources
Authors
->
Table
Ngrams
.
Authors
Institutes
->
Table
Ngrams
.
Institutes
Terms
->
Table
Ngrams
.
NgramsTerms
Sources
->
Ngrams
.
Sources
Authors
->
Ngrams
.
Authors
Institutes
->
Ngrams
.
Institutes
Terms
->
Ngrams
.
NgramsTerms
_
->
panicTrace
$
here
<>
"No Ngrams for this tab"
-- TODO: This `panic` would disapear with custom NgramsType.
...
...
@@ -784,7 +784,7 @@ instance ToSchema UpdateTableNgramsCharts where
declareNamedSchema
=
genericDeclareNamedSchema
(
unPrefixSwagger
"_utn_"
)
------------------------------------------------------------------------
type
NgramsList
=
(
Map
Table
Ngrams
.
NgramsType
(
Versioned
NgramsTableMap
))
type
NgramsList
=
(
Map
Ngrams
.
NgramsType
(
Versioned
NgramsTableMap
))
-- | Same as NgramsList, but wraps node_id so that the inner .json file can have proper name
...
...
src/Gargantext/API/Node/Corpus/Export.hs
View file @
72f5f35c
...
...
@@ -27,7 +27,9 @@ import Gargantext.API.Node.Corpus.Export.Types ( Corpus(..) )
import
Gargantext.API.Node.Document.Export.Types
qualified
as
DocumentExport
import
Gargantext.API.Prelude
(
GargNoServer
)
import
Gargantext.Core.NodeStory.Types
(
NodeListStory
)
import
Gargantext.Core.Types
import
Gargantext.Core.Text.Ngrams
(
NgramsType
(
..
))
import
Gargantext.Core.Types.Main
(
ListType
(
MapTerm
)
)
import
Gargantext.Database.Admin.Types.Node
import
Gargantext.Database.Action.Metrics.NgramsByContext
(
getNgramsByContextOnlyUser
)
import
Gargantext.Database.Admin.Config
(
userMaster
)
import
Gargantext.Database.Admin.Types.Hyperdata.Document
(
HyperdataDocument
(
..
)
)
...
...
@@ -37,7 +39,6 @@ import Gargantext.Database.Query.Table.Node.Error (HasNodeError)
import
Gargantext.Database.Query.Table.Node.Select
(
selectNodesWithUsername
)
import
Gargantext.Database.Query.Table.NodeContext
(
selectDocNodes
)
import
Gargantext.Database.Schema.Context
(
_context_id
)
import
Gargantext.Database.Schema.Ngrams
(
NgramsType
(
..
))
import
Gargantext.Prelude
hiding
(
hash
)
import
Gargantext.Prelude.Crypto.Hash
(
hash
)
import
Servant
(
Headers
,
Header
,
addHeader
)
...
...
src/Gargantext/API/Node/Corpus/Export/Types.hs
View file @
72f5f35c
...
...
@@ -14,13 +14,13 @@ Portability : POSIX
module
Gargantext.API.Node.Corpus.Export.Types
where
import
Data.Aeson.TH
(
deriveJSON
)
import
Data.Swagger
import
Data.Swagger
(
ToSchema
(
..
),
genericDeclareNamedSchema
,
ToParamSchema
(
..
)
)
import
Data.Text
(
Text
)
import
GHC.Generics
(
Generic
)
import
qualified
Gargantext.API.Node.Document.Export.Types
as
DocumentExport
import
Gargantext.Core.Types
import
Gargantext.API.Node.Document.Export.Types
qualified
as
DocumentExport
import
Gargantext.Core.Text.Ngrams
(
NgramsType
(
..
))
import
Gargantext.Core.Types
(
ListId
,
TODO
)
import
Gargantext.Core.Utils.Prefix
(
unPrefix
,
unPrefixSwagger
)
import
Gargantext.Database.Schema.Ngrams
(
NgramsType
(
..
))
import
Servant
...
...
src/Gargantext/API/Node/Update.hs
View file @
72f5f35c
...
...
@@ -9,9 +9,8 @@ Portability : POSIX
-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE TypeOperators #-}
{-# OPTIONS_GHC -fno-warn-orphans #-}
{-# LANGUAGE TypeOperators #-}
module
Gargantext.API.Node.Update
where
...
...
@@ -19,16 +18,17 @@ module Gargantext.API.Node.Update
import
Control.Lens
(
view
)
import
Data.Aeson
import
Data.Set
qualified
as
Set
import
Data.Swagger
import
Data.Swagger
(
ToSchema
)
import
Gargantext.API.Admin.EnvTypes
(
GargJob
(
..
),
Env
)
import
Gargantext.API.Admin.Orchestrator.Types
(
JobLog
(
..
),
AsyncJobs
)
import
Gargantext.API.Admin.Types
(
HasSettings
)
import
Gargantext.API.Errors.Types
import
Gargantext.API.Errors.Types
(
BackendInternalError
)
import
Gargantext.API.Metrics
qualified
as
Metrics
import
Gargantext.API.Ngrams.Types
qualified
as
NgramsTypes
import
Gargantext.API.Prelude
(
GargM
,
simuLogs
)
import
Gargantext.Core.Methods.Similarities
(
GraphMetric
(
..
))
import
Gargantext.Core.NodeStory
(
HasNodeStory
)
import
Gargantext.Core.NodeStory.Types
(
HasNodeStory
)
import
Gargantext.Core.Text.Ngrams
(
NgramsType
(
NgramsTerms
))
import
Gargantext.Core.Types.Main
(
ListType
(
..
))
import
Gargantext.Core.Viz.Graph.API
(
recomputeGraph
)
import
Gargantext.Core.Viz.Graph.Tools
(
PartitionMethod
(
..
),
BridgenessMethod
(
..
))
...
...
@@ -38,20 +38,19 @@ import Gargantext.Core.Viz.Phylo.API.Tools (flowPhyloAPI)
import
Gargantext.Database.Action.Flow
(
reIndexWith
)
import
Gargantext.Database.Action.Flow.Pairing
(
pairing
)
import
Gargantext.Database.Action.Metrics
(
updateNgramsOccurrences
,
updateContextScore
)
import
Gargantext.Database.Admin.Types.Hyperdata
import
Gargantext.Database.Admin.Types.Node
import
Gargantext.Database.Admin.Types.Hyperdata
.Phylo
(
HyperdataPhylo
(
HyperdataPhylo
)
)
import
Gargantext.Database.Admin.Types.Node
(
NodeId
,
NodeType
(
NodeCorpus
,
NodeAnnuaire
)
)
import
Gargantext.Database.Query.Table.Node
(
defaultList
,
getNode
)
import
Gargantext.Database.Query.Table.Node.UpdateOpaleye
(
updateHyperdata
)
import
Gargantext.Database.Schema.Ngrams
(
NgramsType
(
NgramsTerms
))
import
Gargantext.Database.Schema.Node
(
node_parent_id
)
import
Gargantext.Prelude
import
Gargantext.System.Logging
(
MonadLogger
)
import
Gargantext.Utils.Aeson
qualified
as
GUA
import
Gargantext.Utils.Jobs
(
serveJobsAPI
,
MonadJobStatus
(
..
))
import
Gargantext.Utils.UTCTime
(
timeMeasured
)
import
Servant
import
Test.QuickCheck
(
elements
)
import
Test.QuickCheck.Arbitrary
import
Gargantext.Utils.UTCTime
(
timeMeasured
)
import
Gargantext.System.Logging
import
Test.QuickCheck.Arbitrary
(
Arbitrary
(
arbitrary
)
)
------------------------------------------------------------------------
type
API
=
Summary
" Update node according to NodeType params"
...
...
src/Gargantext/Core/Flow/Types.hs
View file @
72f5f35c
...
...
@@ -14,8 +14,8 @@ Portability : POSIX
module
Gargantext.Core.Flow.Types
where
import
Control.Lens
import
Gargantext.Database.Admin.Types.Node
import
Control.Lens
(
Lens
'
)
import
Gargantext.Database.Admin.Types.Node
(
Node
)
import
Gargantext.Database.Schema.Node
(
node_hash_id
)
import
Gargantext.Prelude
import
Gargantext.Prelude.Crypto.Hash
(
Hash
)
...
...
src/Gargantext/Core/NodeStory.hs
View file @
72f5f35c
...
...
@@ -73,10 +73,10 @@ import Database.PostgreSQL.Simple.ToField qualified as PGS
import
Gargantext.API.Ngrams.Types
import
Gargantext.Core.NodeStory.DB
import
Gargantext.Core.NodeStory.Types
import
Gargantext.Core.Text.Ngrams
qualified
as
Ngrams
import
Gargantext.Database.Admin.Types.Node
(
ListId
,
NodeId
(
..
)
)
import
Gargantext.Database.Admin.Config
()
import
Gargantext.Database.Prelude
(
HasConnectionPool
(
..
))
import
Gargantext.Database.Query.Table.Ngrams
qualified
as
TableNgrams
import
Gargantext.Prelude
hiding
(
to
)
import
Gargantext.Prelude.Database
(
runPGSAdvisoryXactLock
,
runPGSExecute
,
runPGSQuery
)
...
...
@@ -84,7 +84,7 @@ import Gargantext.Prelude.Database ( runPGSAdvisoryXactLock, runPGSExecute, runP
getNodeStory'
::
PGS
.
Connection
->
NodeId
->
IO
ArchiveList
getNodeStory'
c
nId
=
do
--res <- withResource pool $ \c -> runSelect c query :: IO [NodeStoryPoly NodeId Version Int Int NgramsRepoElement]
res
<-
runPGSQuery
c
nodeStoriesQuery
(
PGS
.
Only
$
PGS
.
toField
nId
)
::
IO
[(
Version
,
Table
Ngrams
.
NgramsType
,
NgramsTerm
,
NgramsRepoElement
)]
res
<-
runPGSQuery
c
nodeStoriesQuery
(
PGS
.
Only
$
PGS
.
toField
nId
)
::
IO
[(
Version
,
Ngrams
.
NgramsType
,
NgramsTerm
,
NgramsRepoElement
)]
-- We have multiple rows with same node_id and different (ngrams_type_id, ngrams_id).
-- Need to create a map: {<node_id>: {<ngrams_type_id>: {<ngrams_id>: <data>}}}
let
dbData
=
map
(
\
(
version
,
ngramsType
,
ngrams
,
ngrams_repo_element
)
->
...
...
@@ -235,9 +235,9 @@ fixChildrenInNgrams ns = archiveStateFromList $ nsParents <> nsChildrenFixed
)
)
<$>
nsChildren
-- |
Sometimes, when we upload a new list, a child can be left withou
t
--
a parent. Find such ngrams and set their 'root' and 'parent' to
-- 'Nothing'.
-- |
(#281) Sometimes, when we upload a new list, a child can be lef
t
--
without a parent. Find such ngrams and set their 'root' and
-- '
parent' to '
Nothing'.
fixChildrenWithNoParent
::
NgramsState'
->
NgramsState'
fixChildrenWithNoParent
ns
=
archiveStateFromList
$
nsParents
<>
nsChildrenFixed
where
...
...
@@ -341,13 +341,13 @@ fixNodeStoryVersions = do
-- printDebug "[fixNodeStoryVersions] nIds" nIds
mapM_
(
\
(
PGS
.
Only
nId
)
->
do
-- printDebug "[fixNodeStoryVersions] nId" nId
updateVer
c
Table
Ngrams
.
Authors
nId
updateVer
c
Ngrams
.
Authors
nId
updateVer
c
Table
Ngrams
.
Institutes
nId
updateVer
c
Ngrams
.
Institutes
nId
updateVer
c
Table
Ngrams
.
Sources
nId
updateVer
c
Ngrams
.
Sources
nId
updateVer
c
Table
Ngrams
.
NgramsTerms
nId
updateVer
c
Ngrams
.
NgramsTerms
nId
pure
()
)
nIds
...
...
@@ -363,7 +363,7 @@ fixNodeStoryVersions = do
SET version = ?
WHERE node_id = ?
AND ngrams_type_id = ?
|]
updateVer
::
PGS
.
Connection
->
Table
Ngrams
.
NgramsType
->
Int64
->
IO
()
updateVer
::
PGS
.
Connection
->
Ngrams
.
NgramsType
->
Int64
->
IO
()
updateVer
c
ngramsType
nId
=
do
maxVer
<-
runPGSQuery
c
maxVerQuery
(
nId
,
ngramsType
)
::
IO
[
PGS
.
Only
(
Maybe
Int64
)]
case
maxVer
of
...
...
src/Gargantext/Core/NodeStory/DB.hs
View file @
72f5f35c
...
...
@@ -12,7 +12,6 @@ Portability : POSIX
{-# LANGUAGE Arrows #-}
{-# LANGUAGE ConstraintKinds #-}
{-# LANGUAGE QuasiQuotes #-}
{-# LANGUAGE TemplateHaskell #-}
module
Gargantext.Core.NodeStory.DB
(
nodeExists
...
...
@@ -27,22 +26,20 @@ module Gargantext.Core.NodeStory.DB
where
import
Control.Lens
((
^.
))
import
Control.Monad.Except
import
Data.HashMap.Strict
(
HashMap
)
import
Data.HashMap.Strict
qualified
as
HashMap
import
Data.Map.Strict
qualified
as
Map
import
Data.Map.Strict.Patch
qualified
as
PM
import
Data.Monoid
import
Database.PostgreSQL.Simple
qualified
as
PGS
import
Database.PostgreSQL.Simple.SqlQQ
(
sql
)
import
Database.PostgreSQL.Simple.Types
(
Values
(
..
),
QualifiedIdentifier
(
..
))
import
Gargantext.API.Ngrams.Types
import
Gargantext.Core
(
toDBid
)
import
Gargantext.Core.NodeStory.Types
import
Gargantext.Core.Types
(
NodeId
(
..
),
NodeType
)
import
Gargantext.Core.NodeStory.Types
(
a_state
,
a_version
,
ArchiveList
,
ArchiveStateList
,
NgramsStatePatch
'
)
import
Gargantext.Core.Text.Ngrams
(
NgramsType
)
import
Gargantext.Database.Admin.Types.Node
(
NodeId
(
..
),
NodeType
)
import
Gargantext.Database.Admin.Config
()
import
Gargantext.Database.Query.Table.Ngrams
qualified
as
TableNgrams
import
Gargantext.Database.Schema.Ngrams
(
NgramsType
)
import
Gargantext.Database.Schema.Ngrams
()
import
Gargantext.Prelude
hiding
(
to
)
import
Gargantext.Prelude.Database
...
...
@@ -70,7 +67,7 @@ getNodesArchiveHistory :: PGS.Connection
->
IO
[(
NodeId
,
(
Map
NgramsType
[
HashMap
NgramsTerm
NgramsPatch
]))]
getNodesArchiveHistory
c
nodesId
=
do
as
<-
runPGSQuery
c
query
(
PGS
.
Only
$
Values
fields
nodesId
)
::
IO
[(
Int
,
TableNgrams
.
NgramsType
,
NgramsTerm
,
NgramsPatch
)]
::
IO
[(
Int
,
NgramsType
,
NgramsTerm
,
NgramsPatch
)]
pure
$
map
(
\
(
nId
,
ngramsType
,
terms
,
patch
)
->
(
UnsafeMkNodeId
nId
...
...
@@ -96,11 +93,11 @@ insertNodeArchiveHistory _ _ _ [] = pure ()
insertNodeArchiveHistory
c
nodeId
version
(
h
:
hs
)
=
do
let
tuples
=
mconcat
$
(
\
(
nType
,
NgramsTablePatch
patch
)
->
(
\
(
term
,
p
)
->
(
nodeId
,
nType
,
term
,
p
))
<$>
PM
.
toList
patch
)
<$>
PM
.
toList
h
::
[(
NodeId
,
TableNgrams
.
NgramsType
,
NgramsTerm
,
NgramsPatch
)]
(
nodeId
,
nType
,
term
,
p
))
<$>
PM
.
toList
patch
)
<$>
PM
.
toList
h
::
[(
NodeId
,
NgramsType
,
NgramsTerm
,
NgramsPatch
)]
tuplesM
<-
mapM
(
\
(
nId
,
nType
,
term
,
patch
)
->
do
[
PGS
.
Only
ngramsId
]
<-
runPGSReturning
c
qInsert
[
PGS
.
Only
term
]
::
IO
[
PGS
.
Only
Int
]
pure
(
nId
,
nType
,
ngramsId
,
term
,
patch
)
)
tuples
::
IO
[(
NodeId
,
TableNgrams
.
NgramsType
,
Int
,
NgramsTerm
,
NgramsPatch
)]
)
tuples
::
IO
[(
NodeId
,
NgramsType
,
Int
,
NgramsTerm
,
NgramsPatch
)]
_
<-
runPGSExecuteMany
c
query
$
((
\
(
nId
,
nType
,
termId
,
_term
,
patch
)
->
(
nId
,
nType
,
termId
,
patch
,
version
))
<$>
tuplesM
)
_
<-
insertNodeArchiveHistory
c
nodeId
version
hs
pure
()
...
...
src/Gargantext/Core/NodeStory/Types.hs
View file @
72f5f35c
...
...
@@ -56,10 +56,10 @@ import Data.Set qualified as Set
import
Database.PostgreSQL.Simple.FromField
(
FromField
(
fromField
),
fromJSONField
)
import
Gargantext.API.Ngrams.Types
import
Gargantext.Database.Admin.Types.Node
(
NodeId
(
..
)
)
import
Gargantext.Core.Text.Ngrams
qualified
as
Ngrams
import
Gargantext.Core.Utils.Prefix
(
unPrefix
)
import
Gargantext.Database.Admin.Config
()
import
Gargantext.Database.Prelude
(
DbCmd
'
)
import
Gargantext.Database.Schema.Ngrams
qualified
as
TableNgrams
import
Gargantext.Database.Query.Table.Node.Error
(
HasNodeError
())
import
Gargantext.Prelude
hiding
(
to
)
import
Opaleye
(
DefaultFromField
(
..
),
SqlJsonb
,
fromPGSFromField
)
...
...
@@ -100,8 +100,8 @@ instance (Serialise s, Serialise p) => Serialise (Archive s p)
type
NodeListStory
=
NodeStory
NgramsState'
NgramsStatePatch'
-- NOTE: 'type NgramsTableMap = Map NgramsTerm NgramsRepoElement'
type
NgramsState'
=
Map
Table
Ngrams
.
NgramsType
NgramsTableMap
type
NgramsStatePatch'
=
PatchMap
Table
Ngrams
.
NgramsType
NgramsTablePatch
type
NgramsState'
=
Map
Ngrams
.
NgramsType
NgramsTableMap
type
NgramsStatePatch'
=
PatchMap
Ngrams
.
NgramsType
NgramsTablePatch
-- instance Serialise NgramsStatePatch'
instance
FromField
(
Archive
NgramsState'
NgramsStatePatch'
)
where
...
...
@@ -167,7 +167,7 @@ initNodeListStoryMock = NodeStory $ Map.singleton nodeListId archive
archive
=
Archive
{
_a_version
=
0
,
_a_state
=
ngramsTableMap
,
_a_history
=
[]
}
ngramsTableMap
=
Map
.
singleton
Table
Ngrams
.
NgramsTerms
ngramsTableMap
=
Map
.
singleton
Ngrams
.
NgramsTerms
$
Map
.
fromList
[
(
n
^.
ne_ngrams
,
ngramsElementToRepo
n
)
|
n
<-
mockTable
^.
_NgramsTable
...
...
@@ -231,8 +231,8 @@ class HasNodeArchiveStoryImmediateSaver env where
type
ArchiveStateList
=
[(
Table
Ngrams
.
NgramsType
,
NgramsTerm
,
NgramsRepoElement
)]
type
ArchiveStateSet
=
Set
.
Set
(
Table
Ngrams
.
NgramsType
,
NgramsTerm
)
type
ArchiveStateList
=
[(
Ngrams
.
NgramsType
,
NgramsTerm
,
NgramsRepoElement
)]
type
ArchiveStateSet
=
Set
.
Set
(
Ngrams
.
NgramsType
,
NgramsTerm
)
------------------------------------------------------------------------
------------------------------------------------------------------------
...
...
src/Gargantext/Core/Text/Corpus/Parsers.hs
View file @
72f5f35c
...
...
@@ -46,11 +46,11 @@ import Gargantext.Core.Text.Corpus.Parsers.JSON (parseJSONC, parseIstex)
import
Gargantext.Core.Text.Corpus.Parsers.RIS
qualified
as
RIS
import
Gargantext.Core.Text.Corpus.Parsers.RIS.Presse
(
presseEnrich
)
import
Gargantext.Core.Text.Corpus.Parsers.WOS
qualified
as
WOS
import
Gargantext.Core.Text.Ngrams
(
NgramsType
(
..
))
import
Gargantext.Database.Admin.Types.Hyperdata.Document
(
HyperdataDocument
(
..
)
)
import
Gargantext.Database.Query.Table.Ngrams
(
NgramsType
(
..
))
import
Gargantext.Prelude
hiding
(
show
,
undefined
)
import
Gargantext.Utils.Zip
qualified
as
UZip
import
Protolude
import
Protolude
(
show
)
import
System.FilePath
(
takeExtension
)
------------------------------------------------------------------------
...
...
src/Gargantext/Core/Text/List.hs
View file @
72f5f35c
...
...
@@ -9,14 +9,13 @@ Portability : POSIX
-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE ScopedTypeVariables #-}
module
Gargantext.Core.Text.List
where
import
Control.Lens
hiding
(
both
)
-- ((^.), view, over, set, (_1), (_2))
import
Control.Lens
(
view
,
over
)
-- ((^.), view, over, set, (_1), (_2))
import
Data.HashMap.Strict
(
HashMap
)
import
Data.HashMap.Strict
qualified
as
HashMap
import
Data.HashSet
(
HashSet
)
...
...
@@ -27,26 +26,27 @@ import Data.Set qualified as Set
import
Data.Tuple.Extra
(
both
)
import
Gargantext.API.Ngrams.Types
(
NgramsElement
,
NgramsTerm
(
..
))
import
Gargantext.Core.NLP
(
HasNLPServer
)
import
Gargantext.Core.NodeStory
import
Gargantext.Core.NodeStory
.Types
(
HasNodeStory
)
import
Gargantext.Core.Text
(
size
)
import
Gargantext.Core.Text.List.Group
import
Gargantext.Core.Text.List.Group
(
toGroupedTree
,
setScoresWithMap
)
import
Gargantext.Core.Text.List.Group.Prelude
import
Gargantext.Core.Text.List.Group.WithStem
import
Gargantext.Core.Text.List.Social
import
Gargantext.Core.Text.List.Social.Prelude
import
Gargantext.Core.Text.List.Social
(
FlowSocialListWith
,
flowSocialList
)
import
Gargantext.Core.Text.List.Social.Prelude
(
FlowListScores
,
FlowCont
(
FlowCont
),
flc_scores
)
import
Gargantext.Core.Text.Metrics
(
scored'
,
Scored
(
..
),
scored_speExc
,
scored_genInc
,
normalizeGlobal
,
normalizeLocal
,
scored_terms
)
import
Gargantext.Core.T
ypes
(
ListType
(
..
),
MasterCorpusId
,
UserCorpusId
,
ContextId
)
import
Gargantext.Core.T
ext.Ngrams
(
NgramsType
(
..
),
Ngrams
(
..
)
)
import
Gargantext.Core.Types.Individu
(
User
(
..
))
import
Gargantext.Core.Types.Main
(
ListType
(
..
)
)
import
Gargantext.Data.HashMap.Strict.Utils
qualified
as
HashMap
import
Gargantext.Database.Action.Metrics.NgramsByContext
(
getContextsByNgramsUser
,
getContextsByNgramsOnlyUser
)
import
Gargantext.Database.Action.Metrics.TFICF
(
getTficf_withSample
)
import
Gargantext.Database.Admin.Types.Node
(
MasterCorpusId
,
UserCorpusId
,
ContextId
)
import
Gargantext.Database.Prelude
(
DBCmd
)
import
Gargantext.Database.Query.Table.Ngrams
(
text2ngrams
)
import
Gargantext.Database.Query.Table.NgramsPostag
(
selectLems
)
import
Gargantext.Database.Query.Table.Node
(
defaultList
)
import
Gargantext.Database.Query.Table.Node.Error
(
HasNodeError
())
import
Gargantext.Database.Query.Tree.Error
(
HasTreeError
)
import
Gargantext.Database.Schema.Ngrams
(
NgramsType
(
..
),
Ngrams
(
..
)
)
import
Gargantext.Database.Schema.Ngrams
(
text2ngrams
)
import
Gargantext.Prelude
{-
...
...
@@ -81,8 +81,8 @@ buildNgramsLists user uCid mCid mfslw gp = do
pure
$
Map
.
unions
$
[
ngTerms
]
<>
othersTerms
data
MapListSize
=
MapListSize
{
unMapListSize
::
!
Int
}
data
MaxListSize
=
MaxListSize
{
unMaxListSize
::
!
Int
}
newtype
MapListSize
=
MapListSize
{
unMapListSize
::
Int
}
newtype
MaxListSize
=
MaxListSize
{
unMaxListSize
::
Int
}
buildNgramsOthersList
::
(
HasNodeError
err
,
HasNLPServer
env
...
...
@@ -103,7 +103,7 @@ buildNgramsOthersList user uCid mfslw _groupParams (nt, MapListSize mapListSize,
<-
flowSocialList
mfslw
user
nt
(
FlowCont
HashMap
.
empty
$
HashMap
.
fromList
$
List
.
zip
(
HashMap
.
keys
allTerms
)
(
List
.
cycle
[
mempty
]
)
(
repeat
mempty
)
)
let
groupedWithList
=
toGroupedTree
{- groupParams -}
socialLists
allTerms
...
...
@@ -113,29 +113,36 @@ buildNgramsOthersList user uCid mfslw _groupParams (nt, MapListSize mapListSize,
(
mapTerms
,
tailTerms'
)
=
HashMap
.
partition
((
==
Just
MapTerm
)
.
viewListType
)
tailTerms
listSize
=
mapListSize
-
(
List
.
length
mapTerms
)
listSize
=
mapListSize
-
List
.
length
mapTerms
(
mapTerms'
,
candiTerms
)
=
both
HashMap
.
fromList
$
List
.
splitAt
listSize
$
List
.
take
maxListSize
$
List
.
sortOn
(
Down
.
viewScore
.
snd
)
$
HashMap
.
toList
tailTerms'
pure
$
Map
.
fromList
[(
nt
,
List
.
take
maxListSize
$
(
toNgramsElement
stopTerms
)
<>
(
toNgramsElement
mapTerms
)
<>
(
toNgramsElement
$
setListType
(
Just
MapTerm
)
mapTerms'
)
<>
(
toNgramsElement
$
setListType
(
Just
CandidateTerm
)
candiTerms
)
pure
$
Map
.
fromList
[(
nt
,
List
.
take
maxListSize
$
toNgramsElement
stopTerms
<>
toNgramsElement
mapTerms
<>
toNgramsElement
(
setListType
(
Just
MapTerm
)
mapTerms'
)
<>
toNgramsElement
(
setListType
(
Just
CandidateTerm
)
candiTerms
)
)]
-- | https://gitlab.iscpif.fr/gargantext/haskell-gargantext/issues/169#note_10049
-- Stemming can be useful if you do not have any context: ok for full text search then.
--
-- In document, we have context so we can add grammar and linguistics
-- rules to be more precise than the stemmatization, that is why the
-- lemmatization is used here to group. Basically it will avoid
-- grouping homonyms in list. In search usually you add more context
-- to "control" the stemmatization approximation.
getGroupParams
::
(
HasNodeError
err
,
HasTreeError
err
)
=>
GroupParams
->
HashSet
Ngrams
->
DBCmd
err
GroupParams
getGroupParams
gp
@
(
GroupWithPosTag
l
nsc
_m
)
ng
=
do
!
hashMap
<-
HashMap
.
fromList
<$>
selectLems
l
nsc
(
HashSet
.
toList
ng
)
getGroupParams
gp
@
(
GroupWithPosTag
{
..
}
)
ng
=
do
!
hashMap
<-
HashMap
.
fromList
<$>
selectLems
_gwl_lang
_gwl_nlp_config
(
HashSet
.
toList
ng
)
-- printDebug "hashMap" hashMap
pure
$
over
gwl_map
(
\
x
->
x
<>
hashMap
)
gp
pure
$
over
gwl_map
(
<>
hashMap
)
gp
getGroupParams
gp
_
=
pure
gp
...
...
@@ -167,8 +174,8 @@ buildNgramsTermsList user uCid mCid mfslw groupParams (nt, MapListSize mapListSi
!
(
socialLists
::
FlowCont
NgramsTerm
FlowListScores
)
<-
flowSocialList
mfslw
user
nt
(
FlowCont
HashMap
.
empty
$
HashMap
.
fromList
$
List
.
zip
(
HashMap
.
keys
allTerms
)
(
List
.
cycle
[
mempty
]
)
$
List
.
zip
(
HashMap
.
keys
allTerms
)
(
repeat
mempty
)
)
-- printDebug "[buildNgramsTermsList: Flow Social List / end]" nt
...
...
@@ -181,17 +188,17 @@ buildNgramsTermsList user uCid mCid mfslw groupParams (nt, MapListSize mapListSi
!
groupParams'
<-
getGroupParams
groupParams
(
HashSet
.
map
(
text2ngrams
.
unNgramsTerm
)
ngramsKeys
)
-- printDebug "[buildNgramsTermsList: groupParams']" ("" :: Text)
let
!
socialLists_Stemmed
=
addScoreStem
groupParams'
ngramsKeys
socialLists
!
groupedWithList
=
toGroupedTree
socialLists_Stemmed
allTerms
!
(
stopTerms
,
candidateTerms
)
=
HashMap
.
partition
((
==
Just
StopTerm
)
.
viewListType
)
$
HashMap
.
filter
(
\
g
->
(
view
gts'_score
g
)
>
1
)
$
HashMap
.
filter
(
\
g
->
view
gts'_score
g
>
1
)
$
view
flc_scores
groupedWithList
!
(
groupedMono
,
groupedMult
)
=
HashMap
.
partitionWithKey
(
\
(
NgramsTerm
t
)
_v
->
size
t
<
2
)
candidateTerms
-- void $ panicTrace $ "groupedWithList: " <> show groupedWithList
-- printDebug "[buildNgramsTermsList] socialLists" socialLists
-- printDebug "[buildNgramsTermsList] socialLists with scores" socialLists_Stemmed
-- printDebug "[buildNgramsTermsList] groupedWithList" groupedWithList
...
...
@@ -269,8 +276,8 @@ buildNgramsTermsList user uCid mCid mfslw groupParams (nt, MapListSize mapListSi
!
(
monoScored
,
multScored
)
=
HashMap
.
partitionWithKey
(
\
(
NgramsTerm
t
)
_v
->
size
t
<
2
)
groupedTreeScores_SpeGen
-- filter with max score
partitionWithMaxScore
=
HashMap
.
partition
(
\
g
->
(
view
scored_genInc
$
view
gts'_score
g
)
>
(
view
scored_speExc
$
view
gts'_score
g
)
partitionWithMaxScore
=
HashMap
.
partition
(
\
g
->
view
scored_genInc
(
view
gts'_score
g
)
>
view
scored_speExc
(
view
gts'_score
g
)
)
!
(
monoScoredIncl
,
monoScoredExcl
)
=
partitionWithMaxScore
monoScored
...
...
@@ -285,25 +292,25 @@ buildNgramsTermsList user uCid mCid mfslw groupParams (nt, MapListSize mapListSi
!
inclSize
=
0.4
::
Double
!
exclSize
=
1
-
inclSize
splitAt''
max'
n'
=
(
both
(
HashMap
.
fromList
))
.
(
List
.
splitAt
(
round
$
n'
*
max'
)
)
sortOn'
f
=
(
List
.
sortOn
(
Down
.
(
view
(
gts'_score
.
f
))
.
snd
)
)
.
HashMap
.
toList
splitAt''
max'
n'
=
both
HashMap
.
fromList
.
List
.
splitAt
(
round
$
n'
*
max'
)
sortOn'
f
=
List
.
sortOn
(
Down
.
view
(
gts'_score
.
f
)
.
snd
)
.
HashMap
.
toList
monoInc_size
n
=
splitAt''
n
$
monoSize
*
inclSize
/
2
multExc_size
n
=
splitAt''
n
$
multSize
*
exclSize
/
2
!
(
mapMonoScoredInclHead
,
monoScoredInclTail
)
=
monoInc_size
mapSize
$
(
sortOn'
scored_genInc
)
monoScoredIncl
!
(
mapMonoScoredExclHead
,
monoScoredExclTail
)
=
monoInc_size
mapSize
$
(
sortOn'
scored_speExc
)
monoScoredExcl
!
(
mapMonoScoredInclHead
,
monoScoredInclTail
)
=
monoInc_size
mapSize
$
sortOn'
scored_genInc
monoScoredIncl
!
(
mapMonoScoredExclHead
,
monoScoredExclTail
)
=
monoInc_size
mapSize
$
sortOn'
scored_speExc
monoScoredExcl
!
(
mapMultScoredInclHead
,
multScoredInclTail
)
=
multExc_size
mapSize
$
(
sortOn'
scored_genInc
)
multScoredIncl
!
(
mapMultScoredExclHead
,
multScoredExclTail
)
=
multExc_size
mapSize
$
(
sortOn'
scored_speExc
)
multScoredExcl
!
(
mapMultScoredInclHead
,
multScoredInclTail
)
=
multExc_size
mapSize
$
sortOn'
scored_genInc
multScoredIncl
!
(
mapMultScoredExclHead
,
multScoredExclTail
)
=
multExc_size
mapSize
$
sortOn'
scored_speExc
multScoredExcl
!
(
canMonoScoredIncHead
,
_
)
=
monoInc_size
canSize
$
(
sortOn'
scored_genInc
)
monoScoredInclTail
!
(
canMonoScoredExclHead
,
_
)
=
monoInc_size
canSize
$
(
sortOn'
scored_speExc
)
monoScoredExclTail
!
(
canMonoScoredIncHead
,
_
)
=
monoInc_size
canSize
$
sortOn'
scored_genInc
monoScoredInclTail
!
(
canMonoScoredExclHead
,
_
)
=
monoInc_size
canSize
$
sortOn'
scored_speExc
monoScoredExclTail
!
(
canMulScoredInclHead
,
_
)
=
multExc_size
canSize
$
(
sortOn'
scored_genInc
)
multScoredInclTail
!
(
canMultScoredExclHead
,
_
)
=
multExc_size
canSize
$
(
sortOn'
scored_speExc
)
multScoredExclTail
!
(
canMulScoredInclHead
,
_
)
=
multExc_size
canSize
$
sortOn'
scored_genInc
multScoredInclTail
!
(
canMultScoredExclHead
,
_
)
=
multExc_size
canSize
$
sortOn'
scored_speExc
multScoredExclTail
------------------------------------------------------------
-- Final Step building the Typed list
...
...
src/Gargantext/Core/Text/List/Group.hs
View file @
72f5f35c
...
...
@@ -9,11 +9,8 @@ Portability : POSIX
-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE ConstraintKinds #-}
{-# LANGUAGE TypeFamilies #-}
{-# LANGUAGE FunctionalDependencies #-}
{-# LANGUAGE InstanceSigs #-}
module
Gargantext.Core.Text.List.Group
where
...
...
@@ -23,8 +20,8 @@ import Data.HashMap.Strict (HashMap)
import
Data.HashMap.Strict
qualified
as
HashMap
import
Gargantext.API.Ngrams.Types
(
NgramsTerm
(
..
))
import
Gargantext.Core.Text.List.Group.Prelude
import
Gargantext.Core.Text.List.Group.WithScores
import
Gargantext.Core.Text.List.Social.Prelude
import
Gargantext.Core.Text.List.Group.WithScores
(
groupWithScores'
)
import
Gargantext.Core.Text.List.Social.Prelude
(
FlowListScores
,
FlowCont
)
import
Gargantext.Prelude
------------------------------------------------------------------------
toGroupedTree
::
(
Ord
a
,
Monoid
a
,
HasSize
a
)
...
...
@@ -43,9 +40,7 @@ setScoresWithMap :: (Ord a, Ord b, Monoid b) => HashMap NgramsTerm b
->
HashMap
NgramsTerm
(
GroupedTreeScores
b
)
setScoresWithMap
m
=
setScoresWith
(
score
m
)
where
score
m'
t
=
case
HashMap
.
lookup
t
m'
of
Nothing
->
mempty
Just
r
->
r
score
m'
t
=
fromMaybe
mempty
(
HashMap
.
lookup
t
m'
)
setScoresWith
::
(
Ord
a
,
Ord
b
)
=>
(
NgramsTerm
->
b
)
...
...
@@ -58,8 +53,7 @@ setScoresWith f = Map.mapWithKey (\k v -> over gts'_children (setScoresWith f)
)
-}
setScoresWith
f
=
HashMap
.
mapWithKey
(
\
k
v
->
v
{
_gts'_score
=
f
k
,
_gts'_children
=
setScoresWith
f
$
view
gts'_children
v
}
)
,
_gts'_children
=
setScoresWith
f
$
view
gts'_children
v
}
)
------------------------------------------------------------------------
src/Gargantext/Core/Text/List/Group/WithStem.hs
View file @
72f5f35c
...
...
@@ -25,11 +25,11 @@ import Data.HashSet qualified as Set
import
Data.List
qualified
as
List
import
Data.Map.Strict
qualified
as
Map
import
Data.Text
qualified
as
Text
import
Gargantext.API.Ngrams.Types
import
Gargantext.API.Ngrams.Types
(
toNgramsPatch
,
NgramsPatch
,
NgramsTerm
(
..
)
)
import
Gargantext.Core
(
Lang
(
..
),
Form
,
Lem
,
NLPServerConfig
)
import
Gargantext.Core.Text.List.Group.Prelude
import
Gargantext.Core.Text.List.Social.Patch
import
Gargantext.Core.Text.List.Social.Prelude
import
Gargantext.Core.Text.List.Group.Prelude
(
Stem
)
import
Gargantext.Core.Text.List.Social.Patch
(
addScorePatch
)
import
Gargantext.Core.Text.List.Social.Prelude
(
FlowCont
,
FlowListScores
)
import
Gargantext.Core.Text.Terms.Mono.Stem
(
stem
,
StemmingAlgorithm
(
..
))
import
Gargantext.Prelude
...
...
@@ -43,8 +43,8 @@ addScoreStem groupParams ngrams fl = foldl' addScorePatch fl
------------------------------------------------------------------------
-- | Main Types
data
StopSize
=
StopSize
{
unStopSize
::
!
Int
}
deriving
(
Eq
)
newtype
StopSize
=
StopSize
{
unStopSize
::
Int
}
deriving
(
Eq
,
Show
)
-- | TODO: group with 2 terms only can be
-- discussed. Main purpose of this is offering
...
...
@@ -61,7 +61,7 @@ data GroupParams = GroupParams { unGroupParams_lang :: !Lang
,
_gwl_nlp_config
::
!
NLPServerConfig
,
_gwl_map
::
!
(
HashMap
Form
Lem
)
}
deriving
(
Eq
)
deriving
(
Eq
,
Show
)
------------------------------------------------------------------------
groupWith
::
GroupParams
...
...
@@ -80,7 +80,6 @@ groupWith (GroupParams { unGroupParams_lang = l }) t =
$
Text
.
splitOn
" "
$
Text
.
replace
"-"
" "
$
unNgramsTerm
t
-- | This lemmatization group done with CoreNLP algo (or others)
groupWith
(
GroupWithPosTag
{
_gwl_map
=
m
})
t
=
case
HashMap
.
lookup
(
unNgramsTerm
t
)
m
of
...
...
src/Gargantext/Core/Text/List/Social.hs
View file @
72f5f35c
...
...
@@ -18,24 +18,23 @@ import Data.Aeson
import
Data.HashMap.Strict
(
HashMap
)
import
Data.List
qualified
as
List
import
Data.Map.Strict
qualified
as
Map
import
Data.Pool
import
Data.Swagger
import
Data.Pool
(
withResource
)
import
Data.Swagger
(
ToSchema
(
..
),
genericDeclareNamedSchema
,
defaultSchemaOptions
)
import
Data.Text
qualified
as
T
import
Data.Vector
qualified
as
V
import
GHC.Generics
import
Gargantext.API.Ngrams.Types
(
NgramsTerm
,
NgramsPatch
)
import
Gargantext.Core.NodeStory
(
getNodesArchiveHistory
)
import
Gargantext.Core.NodeStory
.DB
(
getNodesArchiveHistory
)
import
Gargantext.Core.Text.List.Social.Find
(
findListsId
)
import
Gargantext.Core.Text.List.Social.Patch
(
addScorePatches
)
import
Gargantext.Core.Text.List.Social.Prelude
(
FlowCont
,
FlowListScores
)
import
Gargantext.Core.Text.Ngrams
(
NgramsType
)
import
Gargantext.Core.Types.Individu
(
User
)
import
Gargantext.Database.Admin.Types.Node
(
ListId
)
import
Gargantext.Database.Prelude
(
DBCmd
,
connPool
)
import
Gargantext.Database.Query.Table.Node.Error
(
HasNodeError
)
import
Gargantext.Database.Query.Tree
(
NodeMode
(
Private
),
HasTreeError
)
import
Gargantext.Database.Schema.Ngrams
(
NgramsType
)
import
Gargantext.Prelude
import
Test.QuickCheck
import
Test.QuickCheck
(
Arbitrary
(
arbitrary
),
oneof
,
arbitraryBoundedEnum
)
import
Web.Internal.HttpApiData
(
ToHttpApiData
,
FromHttpApiData
,
parseUrlPiece
,
toUrlPiece
)
------------------------------------------------------------------------
------------------------------------------------------------------------
...
...
src/Gargantext/Core/Text/List/Social/Patch.hs
View file @
72f5f35c
...
...
@@ -14,14 +14,13 @@ module Gargantext.Core.Text.List.Social.Patch
import
Control.Lens
hiding
(
cons
)
import
Data.HashMap.Strict
(
HashMap
)
import
Data.HashMap.Strict
qualified
as
HashMap
import
Data.List
qualified
as
List
import
Data.Map.Strict
qualified
as
Map
import
Data.Patch.Class
qualified
as
Patch
(
Replace
(
..
))
import
Gargantext.API.Ngrams.Prelude
(
unMSet
,
patchMSet_toList
)
import
Gargantext.API.Ngrams.Types
import
Gargantext.API.Ngrams.Types
(
NgramsTerm
,
nre_children
,
nre_list
,
MSet
,
NgramsPatch
(
..
)
)
import
Gargantext.Core.Text.List.Social.Prelude
import
Gargantext.Core.T
ypes
(
ListId
)
import
Gargantext.Database.
Schema.Ngrams
(
NgramsType
(
..
)
)
import
Gargantext.Core.T
ext.Ngrams
(
NgramsType
(
..
)
)
import
Gargantext.Database.
Admin.Types.Node
(
ListId
)
import
Gargantext.Prelude
addScorePatches
::
NgramsType
->
[
ListId
]
...
...
@@ -40,7 +39,7 @@ addScorePatchesList :: NgramsType
addScorePatchesList
nt
repo
fl
lid
=
foldl'
addScorePatch
fl
patches
where
patches
=
maybe
[]
(
List
.
concat
.
(
map
HashMap
.
toList
)
)
patches'
patches
=
maybe
[]
(
concatMap
HashMap
.
toList
)
patches'
patches'
=
do
lists
<-
Map
.
lookup
lid
repo
...
...
src/Gargantext/Core/Text/Ngrams.hs
0 → 100644
View file @
72f5f35c
{-|
Module : Gargantext.Core.Text.Ngrams
Description : Main Ngrams types
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
{-# OPTIONS_GHC -fno-warn-orphans #-}
{-# LANGUAGE Arrows #-}
{-# LANGUAGE FunctionalDependencies #-}
{-# LANGUAGE QuasiQuotes #-}
{-# LANGUAGE TemplateHaskell #-}
module
Gargantext.Core.Text.Ngrams
where
import
Codec.Serialise
(
Serialise
())
import
Control.Lens
(
over
)
import
Data.Aeson
(
ToJSON
(
..
),
FromJSON
(
..
),
FromJSONKey
(
..
),
FromJSONKeyFunction
(
..
),
Value
(
String
),
ToJSONKey
(
..
)
)
import
Data.Aeson.Types
(
toJSONKeyText
)
import
Data.Text
(
pack
)
import
Database.PostgreSQL.Simple
qualified
as
PGS
import
Gargantext.Core.Types
(
TODO
(
..
))
import
Gargantext.Database.Schema.Prelude
hiding
(
over
)
import
Gargantext.Prelude
import
Servant
(
FromHttpApiData
(
..
),
ToHttpApiData
(
..
))
import
Test.QuickCheck
(
elements
)
import
Text.Read
(
read
)
-- | Main Ngrams Types
-- | Typed Ngrams
-- Typed Ngrams localize the context of the ngrams
-- ngrams in source field of document has Sources Type
-- ngrams in authors field of document has Authors Type
-- ngrams in text fields of documents has Terms Type (i.e. either title or abstract)
data
NgramsType
=
Authors
|
Institutes
|
Sources
|
NgramsTerms
deriving
(
Eq
,
Show
,
Read
,
Ord
,
Enum
,
Bounded
,
Generic
)
instance
Serialise
NgramsType
instance
FromJSON
NgramsType
where
parseJSON
(
String
"Authors"
)
=
pure
Authors
parseJSON
(
String
"Institutes"
)
=
pure
Institutes
parseJSON
(
String
"Sources"
)
=
pure
Sources
parseJSON
(
String
"Terms"
)
=
pure
NgramsTerms
parseJSON
(
String
"NgramsTerms"
)
=
pure
NgramsTerms
parseJSON
_
=
mzero
instance
FromJSONKey
NgramsType
where
fromJSONKey
=
FromJSONKeyTextParser
(
parseJSON
.
String
)
instance
ToJSON
NgramsType
where
toJSON
Authors
=
String
"Authors"
toJSON
Institutes
=
String
"Institutes"
toJSON
Sources
=
String
"Sources"
toJSON
NgramsTerms
=
String
"Terms"
instance
ToJSONKey
NgramsType
where
toJSONKey
=
toJSONKeyText
(
pack
.
show
)
instance
FromHttpApiData
NgramsType
where
parseUrlPiece
n
=
pure
$
(
read
.
cs
)
n
instance
ToHttpApiData
NgramsType
where
toUrlPiece
=
pack
.
show
instance
ToParamSchema
NgramsType
where
toParamSchema
_
=
toParamSchema
(
Proxy
::
Proxy
TODO
)
instance
Arbitrary
NgramsType
where
arbitrary
=
elements
[
minBound
..
maxBound
]
ngramsTypes
::
[
NgramsType
]
ngramsTypes
=
[
minBound
..
]
instance
ToSchema
NgramsType
{- where
declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_nre_")
--}
data
Ngrams
=
UnsafeNgrams
{
_ngramsTerms
::
Text
,
_ngramsSize
::
Int
}
deriving
(
Generic
,
Show
,
Eq
,
Ord
)
instance
Hashable
Ngrams
makeLenses
''
N
grams
instance
PGS
.
ToRow
Ngrams
where
toRow
(
UnsafeNgrams
t
s
)
=
[
toField
t
,
toField
s
]
------------------------------------------------------------------------
-------------------------------------------------------------------------
-- Named entity are typed ngrams of Terms Ngrams
data
NgramsT
a
=
NgramsT
{
_ngramsType
::
NgramsType
,
_ngramsT
::
a
}
deriving
(
Generic
,
Show
,
Eq
,
Ord
)
makeLenses
''
N
gramsT
instance
Functor
NgramsT
where
fmap
=
over
ngramsT
src/Gargantext/Core/Text/Terms.hs
View file @
72f5f35c
...
...
@@ -37,27 +37,27 @@ compute graph
module
Gargantext.Core.Text.Terms
where
import
Control.Lens
import
Control.Lens
(
(
^.
),
view
,
over
,
makeLenses
)
import
Data.HashMap.Strict
(
HashMap
)
import
Data.HashMap.Strict
qualified
as
HashMap
import
Data.List
qualified
as
List
import
Data.Set
qualified
as
Set
import
Data.Text
qualified
as
Text
import
Data.Traversable
import
GHC.Base
(
String
)
import
Gargantext.Core
import
Gargantext.Core
(
Lang
,
NLPServerConfig
,
PosTagAlgo
)
import
Gargantext.Core.Text
(
sentences
,
HasText
(
..
))
import
Gargantext.Core.Text.Ngrams
(
Ngrams
(
..
),
NgramsType
(
..
),
ngramsTerms
)
import
Gargantext.Core.Text.Terms.Eleve
(
mainEleveWith
,
Tries
,
Token
,
buildTries
,
toToken
)
import
Gargantext.Core.Text.Terms.Mono
(
monoTerms
)
import
Gargantext.Core.Text.Terms.Mono.Stem
(
stem
,
StemmingAlgorithm
(
..
))
import
Gargantext.Core.Text.Terms.Mono.Token.En
(
tokenize
)
import
Gargantext.Core.Text.Terms.Multi
(
multiterms
)
import
Gargantext.Core.Types
import
Gargantext.Core.Types
(
TermsCount
,
POS
,
Terms
(
Terms
),
TermsWithCount
)
import
Gargantext.Core.Utils
(
groupWithCounts
)
import
Gargantext.Database.Prelude
(
DBCmd
)
import
Gargantext.Database.Query.Table.Ngrams
(
insertNgrams
)
import
Gargantext.Database.Query.Table.NgramsPostag
(
NgramsPostag
(
..
),
insertNgramsPostag
,
np_form
,
np_lem
)
import
Gargantext.Database.Schema.Ngrams
(
Ngrams
(
..
),
NgramsType
(
..
),
ngramsTerms
,
text2ngrams
,
NgramsId
)
import
Gargantext.Database.Schema.Ngrams
(
text2ngrams
,
NgramsId
)
import
Gargantext.Prelude
data
TermType
lang
...
...
@@ -103,8 +103,7 @@ withLang (Unsupervised {..}) ns = Unsupervised { _tt_model = m', .. }
$
fmap
toToken
$
uniText
$
Text
.
intercalate
" . "
$
List
.
concat
$
map
hasText
ns
$
concatMap
hasText
ns
just_m
->
just_m
withLang
l
_
=
l
...
...
@@ -126,7 +125,11 @@ class ExtractNgramsT h
------------------------------------------------------------------------
enrichedTerms
::
Lang
->
PosTagAlgo
->
POS
->
Terms
->
NgramsPostag
enrichedTerms
l
pa
po
(
Terms
ng1
ng2
)
=
NgramsPostag
l
pa
po
form
lem
NgramsPostag
{
_np_lang
=
l
,
_np_algo
=
pa
,
_np_postag
=
po
,
_np_form
=
form
,
_np_lem
=
lem
}
where
form
=
text2ngrams
$
Text
.
intercalate
" "
ng1
lem
=
text2ngrams
$
Text
.
intercalate
" "
$
Set
.
toList
ng2
...
...
@@ -138,7 +141,7 @@ cleanNgrams s ng
|
otherwise
=
text2ngrams
(
Text
.
take
s
(
ng
^.
ngramsTerms
))
cleanExtractedNgrams
::
Int
->
ExtractedNgrams
->
ExtractedNgrams
cleanExtractedNgrams
s
(
SimpleNgrams
ng
)
=
SimpleNgrams
$
(
cleanNgrams
s
)
ng
cleanExtractedNgrams
s
(
SimpleNgrams
ng
)
=
SimpleNgrams
$
cleanNgrams
s
ng
cleanExtractedNgrams
s
(
EnrichedNgrams
ng
)
=
EnrichedNgrams
$
over
np_form
(
cleanNgrams
s
)
$
over
np_lem
(
cleanNgrams
s
)
ng
...
...
@@ -156,8 +159,7 @@ insertExtractedNgrams ngs = do
m2
<-
insertNgramsPostag
(
map
unEnrichedNgrams
e
)
--printDebug "terms" m2
let
result
=
HashMap
.
union
m1
m2
pure
result
pure
$
HashMap
.
union
m1
m2
isSimpleNgrams
::
ExtractedNgrams
->
Bool
isSimpleNgrams
(
SimpleNgrams
_
)
=
True
...
...
@@ -189,10 +191,10 @@ type MinNgramSize = Int
termsUnsupervised
::
TermType
Lang
->
Text
->
[
TermsWithCount
]
termsUnsupervised
(
Unsupervised
{
_tt_model
=
Nothing
})
=
panicTrace
"[termsUnsupervised] no model"
termsUnsupervised
(
Unsupervised
{
_tt_model
=
Just
_tt_model
,
..
})
=
map
(
\
(
t
,
cnt
)
->
(
text2term
_tt_lang
t
,
cnt
))
map
(
first
(
text2term
_tt_lang
))
.
groupWithCounts
-- . List.nub
.
(
List
.
filter
(
\
l'
->
List
.
length
l'
>=
_tt_windowSize
)
)
.
List
.
filter
(
\
l'
->
List
.
length
l'
>=
_tt_windowSize
)
.
List
.
concat
.
mainEleveWith
_tt_model
_tt_ngramsSize
.
uniText
...
...
@@ -200,19 +202,18 @@ termsUnsupervised _ = undefined
newTries
::
Int
->
Text
->
Tries
Token
()
newTries
n
t
=
buildTries
n
(
fmap
toToken
$
uniText
t
)
newTries
n
t
=
buildTries
n
(
toToken
<$>
uniText
t
)
-- | TODO removing long terms > 24
uniText
::
Text
->
[[
Text
]]
uniText
=
map
(
List
.
filter
(
not
.
isPunctuation
))
.
map
tokenize
.
sentences
-- TODO get sentences according to lang
.
Text
.
toLower
uniText
=
map
(
List
.
filter
(
not
.
isPunctuation
)
.
tokenize
)
.
sentences
-- TODO get sentences according to lang
.
Text
.
toLower
text2term
::
Lang
->
[
Text
]
->
Terms
text2term
_
[]
=
Terms
[]
Set
.
empty
text2term
lang
txt
=
Terms
txt
(
Set
.
fromList
$
map
(
stem
lang
PorterAlgorithm
)
txt
)
isPunctuation
::
Text
->
Bool
isPunctuation
x
=
List
.
elem
x
$
(
Text
.
pack
.
pure
)
isPunctuation
x
=
List
.
elem
x
$
Text
.
pack
.
pure
<$>
(
"!?(),;.:"
::
String
)
src/Gargantext/Core/Text/Terms/Multi.hs
View file @
72f5f35c
...
...
@@ -11,25 +11,21 @@ Multi-terms are ngrams where n > 1.
-}
{-# LANGUAGE OverloadedStrings #-}
module
Gargantext.Core.Text.Terms.Multi
(
multiterms
,
multiterms_rake
,
tokenTagsWith
,
tokenTags
,
cleanTextForNLP
)
where
import
Control.Applicative
import
Data.Attoparsec.Text
as
DAT
import
Data.Text
hiding
(
map
,
group
,
filter
,
concat
)
import
Data.Attoparsec.Text
as
DAT
(
digit
,
space
,
notChar
,
string
)
import
Gargantext.Core
(
Lang
(
..
),
NLPServerConfig
(
..
),
PosTagAlgo
(
..
))
import
Gargantext.Core.Text.Terms.Multi.Lang.En
qualified
as
En
import
Gargantext.Core.Text.Terms.Multi.Lang.Fr
qualified
as
Fr
import
Gargantext.Core.Text.Terms.Multi.PosTagging
import
Gargantext.Core.Text.Terms.Multi.PosTagging.Types
import
Gargantext.Core.Text.Terms.Multi.PosTagging
(
corenlp
,
tokens2tokensTags
)
import
Gargantext.Core.Text.Terms.Multi.PosTagging.Types
(
PosSentences
(
_sentences
),
Sentence
(
_sentenceTokens
)
)
import
Gargantext.Core.Text.Terms.Multi.RAKE
(
multiterms_rake
)
import
Gargantext.Core.Types
import
Gargantext.Core.Types
(
POS
(
NP
),
Terms
(
Terms
),
TermsWithCount
,
TokenTag
(
TokenTag
,
_my_token_pos
)
)
import
Gargantext.Core.Utils
(
groupWithCounts
)
import
Gargantext.Prelude
import
Gargantext.Utils.SpacyNLP
qualified
as
SpacyNLP
import
Replace.Attoparsec.Text
as
RAT
import
Replace.Attoparsec.Text
as
RAT
(
streamEdit
)
-------------------------------------------------------------------
type
NLP_API
=
Lang
->
Text
->
IO
PosSentences
...
...
src/Gargantext/Core/Text/Terms/Multi/Group.hs
View file @
72f5f35c
...
...
@@ -16,7 +16,7 @@ group the tokens into extracted terms.
module
Gargantext.Core.Text.Terms.Multi.Group
(
group2
)
where
import
Gargantext.Core.Types
import
Gargantext.Core.Types
(
POS
,
TokenTag
(
TokenTag
)
)
import
Gargantext.Prelude
-- | FIXME p1 and p2 not really taken into account
...
...
src/Gargantext/Core/Text/Terms/Multi/Lang/En.hs
View file @
72f5f35c
...
...
@@ -17,8 +17,8 @@ module Gargantext.Core.Text.Terms.Multi.Lang.En (groupTokens)
where
import
Gargantext.Prelude
import
Gargantext.Core.Types
import
Gargantext.Core.Text.Terms.Multi.Group
import
Gargantext.Core.Types
(
POS
(
CC
,
IN
,
DT
,
NP
,
JJ
),
TokenTag
)
import
Gargantext.Core.Text.Terms.Multi.Group
(
group2
)
------------------------------------------------------------------------
-- | Rule grammar to group tokens
...
...
@@ -31,8 +31,7 @@ groupTokens ntags = group2 NP NP
-- $ group2 VB NP
$
group2
JJ
NP
$
group2
JJ
JJ
$
group2
JJ
CC
$
ntags
$
group2
JJ
CC
ntags
------------------------------------------------------------------------
--groupNgrams ((x,_,"PERSON"):(y,yy,"PERSON"):xs) = groupNgrams ((x <> " " <> y,yy,"PERSON"):xs)
...
...
src/Gargantext/Core/Viz/Chart.hs
View file @
72f5f35c
...
...
@@ -9,32 +9,31 @@ Portability : POSIX
-}
{-# LANGUAGE TemplateHaskell #-}
module
Gargantext.Core.Viz.Chart
where
import
Data.HashMap.Strict
qualified
as
HashMap
import
Data.List
qualified
as
List
import
Data.Map.Strict
(
toList
)
import
Data.Set
qualified
as
Set
import
Data.Vector
qualified
as
V
import
Gargantext.API.Ngrams.NgramsTree
import
Gargantext.API.Ngrams.Tools
import
Gargantext.API.Ngrams.Types
import
Gargantext.Core.NodeStory
(
HasNodeStory
)
import
Gargantext.API.Ngrams.NgramsTree
(
toTree
,
NgramsTree
)
import
Gargantext.API.Ngrams.Tools
(
filterListWithRoot
,
getListNgrams
,
getRepo
,
mapTermListRoot
)
import
Gargantext.API.Ngrams.Types
(
NgramsTerm
(
NgramsTerm
)
)
import
Gargantext.Core.NodeStory
.Types
(
HasNodeStory
)
import
Gargantext.Core.Text.Metrics.Count
(
occurrencesWith
)
import
Gargantext.Core.Types
import
Gargantext.Core.Viz.Types
import
Gargantext.Database.Action.Metrics.NgramsByContext
import
Gargantext.Database.Admin.Config
import
Gargantext.Core.Text.Ngrams
(
NgramsType
)
import
Gargantext.Core.Types.Main
(
ListType
)
import
Gargantext.Database.Admin.Types.Node
(
NodeType
(
NodeList
),
CorpusId
,
contextId2NodeId
)
import
Gargantext.Core.Viz.Types
(
Histo
(
Histo
)
)
import
Gargantext.Database.Action.Metrics.NgramsByContext
(
countContextsByNgramsWith
,
getContextsByNgramsOnlyUser
)
import
Gargantext.Database.Admin.Config
(
userMaster
)
import
Gargantext.Database.Prelude
(
DBCmd
)
import
Gargantext.Database.Query.Table.Node
import
Gargantext.Database.Query.Table.Node.Select
import
Gargantext.Database.Query.Table.Node
(
getListsWithParentId
)
import
Gargantext.Database.Query.Table.Node.Select
(
selectNodesWithUsername
)
import
Gargantext.Database.Query.Table.NodeContext
(
selectDocsDates
)
import
Gargantext.Database.Schema.Ngrams
import
Gargantext.Database.Schema.Node
import
Gargantext.Database.Schema.Node
(
NodePoly
(
_node_id
)
)
import
Gargantext.Prelude
hiding
(
toList
)
import
qualified
Data.Set
as
Set
histoData
::
CorpusId
->
DBCmd
err
Histo
...
...
src/Gargantext/Core/Viz/Graph.hs
View file @
72f5f35c
...
...
@@ -9,8 +9,6 @@ Portability : POSIX
-}
{-# LANGUAGE TemplateHaskell #-}
module
Gargantext.Core.Viz.Graph
where
...
...
@@ -19,9 +17,9 @@ import Data.ByteString.Lazy as DBL (readFile, writeFile)
import
Data.HashMap.Strict
(
HashMap
,
lookup
)
import
Data.Text
qualified
as
Text
import
Gargantext.API.Ngrams.Types
(
NgramsTerm
(
..
),
NgramsRepoElement
(
..
),
mSetToList
)
import
Gargantext.Core.Text.Ngrams
(
NgramsType
(
..
))
import
Gargantext.Core.Viz.Graph.Types
import
Gargantext.Database.Admin.Types.Hyperdata.Prelude
import
Gargantext.Database.Schema.Ngrams
(
NgramsType
(
..
))
import
Gargantext.Prelude
import
Text.Read
qualified
as
Text
...
...
src/Gargantext/Core/Viz/Graph/API.hs
View file @
72f5f35c
...
...
@@ -19,31 +19,31 @@ module Gargantext.Core.Viz.Graph.API
where
import
Control.Lens
(
set
,
(
^.
),
_Just
,
(
^?
),
at
)
import
Data.Aeson
import
Data.Aeson
(
ToJSON
,
FromJSON
)
import
Data.HashMap.Strict
qualified
as
HashMap
import
Data.Swagger
import
Data.Swagger
(
ToSchema
)
import
Gargantext.API.Admin.EnvTypes
(
GargJob
(
..
),
Env
)
import
Gargantext.API.Admin.Orchestrator.Types
import
Gargantext.API.Errors.Types
import
Gargantext.API.Admin.Orchestrator.Types
(
JobLog
)
import
Gargantext.API.Errors.Types
(
BackendInternalError
)
import
Gargantext.API.Ngrams.Tools
import
Gargantext.API.Prelude
import
Gargantext.API.Prelude
(
GargM
,
GargServer
)
import
Gargantext.Core.Methods.Similarities
(
Similarity
(
..
),
GraphMetric
(
..
),
withMetric
)
import
Gargantext.Core.NodeStory
import
Gargantext.Core.Types.Main
import
Gargantext.Core.NodeStory.Types
(
HasNodeStory
,
a_version
,
unNodeStory
,
NodeListStory
)
import
Gargantext.Core.Text.Ngrams
(
NgramsType
(
..
))
import
Gargantext.Core.Types.Main
(
ListType
(
MapTerm
)
)
import
Gargantext.Core.Viz.Graph.GEXF
()
import
Gargantext.Core.Viz.Graph.Tools
-- (cooc2graph)
import
Gargantext.Core.Viz.Graph.Types
import
Gargantext.Database.Action.Metrics.NgramsByContext
(
getContextsByNgramsOnlyUser
)
import
Gargantext.Database.Action.Node
(
mkNodeWithParent
)
import
Gargantext.Database.Admin.Config
import
Gargantext.Database.Admin.Config
(
userMaster
)
import
Gargantext.Database.Admin.Types.Node
import
Gargantext.Database.Prelude
(
DBCmd
)
import
Gargantext.Database.Query.Table.Node
import
Gargantext.Database.Query.Table.Node
(
getOrMkList
,
getNodeWith
,
defaultList
,
getClosestParentIdByType
)
import
Gargantext.Database.Query.Table.Node.Error
(
HasNodeError
)
import
Gargantext.Database.Query.Table.Node.Select
import
Gargantext.Database.Query.Table.Node.Select
(
selectNodesWithUsername
)
import
Gargantext.Database.Query.Table.Node.UpdateOpaleye
(
updateHyperdata
)
import
Gargantext.Database.Schema.Ngrams
import
Gargantext.Database.Schema.Node
import
Gargantext.Database.Schema.Node
(
node_hyperdata
,
node_name
)
import
Gargantext.Prelude
import
Gargantext.Utils.Jobs
(
serveJobsAPI
,
MonadJobStatus
(
..
))
import
Servant
...
...
src/Gargantext/Core/Viz/Graph/Tools.hs
View file @
72f5f35c
...
...
@@ -11,39 +11,40 @@ Portability : POSIX
{-# OPTIONS_GHC -fno-warn-deprecations #-}
{-# LANGUAGE BangPatterns, ScopedTypeVariables #-}
{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE ScopedTypeVariables #-}
module
Gargantext.Core.Viz.Graph.Tools
where
import
Data.Aeson
import
Data.Aeson
(
ToJSON
,
FromJSON
)
import
Data.HashMap.Strict
(
HashMap
)
import
Data.HashMap.Strict
qualified
as
HashMap
import
Data.HashSet
qualified
as
HashSet
import
Data.List
qualified
as
List
import
Data.Map.Strict
qualified
as
Map
import
Data.Set
qualified
as
Set
import
Data.Swagger
hiding
(
items
)
import
Data.Swagger
(
ToSchema
)
import
Data.Text
qualified
as
Text
import
Data.Vector.Storable
qualified
as
Vec
import
Gargantext.API.Ngrams.Types
(
NgramsTerm
(
..
))
import
Gargantext.Core.Methods.Similarities
(
Similarity
(
..
),
measure
)
import
Gargantext.Core.Statistics
import
Gargantext.Core.Statistics
(
pcaReduceTo
,
Dimension
(
Dimension
)
)
import
Gargantext.Core.Text.Ngrams
(
NgramsType
(
..
))
import
Gargantext.Core.Viz.Graph.Bridgeness
(
bridgeness
,
Bridgeness
(
..
),
Partitions
,
nodeId2comId
,
{-recursiveClustering,-}
recursiveClustering'
,
setNodes2clusterNodes
)
import
Gargantext.Core.Viz.Graph.Index
(
createIndices
,
toIndex
,
map2mat
,
mat2map
,
Index
,
MatrixShape
(
..
))
import
Gargantext.Core.Viz.Graph.Tools.IGraph
(
mkGraphUfromEdges
,
spinglass
,
spinglass'
)
import
Gargantext.Core.Viz.Graph.Tools.Infomap
(
infomap
)
import
Gargantext.Core.Viz.Graph.Types
(
Attributes
(
..
),
Edge
(
..
),
Graph
(
..
),
MultiPartite
(
..
),
Node
(
..
),
Partite
(
..
),
Strength
(
..
))
import
Gargantext.Core.Viz.Graph.Utils
(
edgesFilter
,
nodesFilter
)
import
Gargantext.Database.Schema.Ngrams
(
NgramsType
(
..
))
import
Gargantext.Prelude
import
Graph.BAC.ProxemyOptim
qualified
as
BAC
import
Graph.Types
(
ClusterNode
)
import
IGraph
qualified
as
Igraph
import
IGraph.Algorithms.Layout
qualified
as
Layout
import
IGraph.Random
-- (Gen(..))
import
IGraph.Random
(
Gen
)
-- (Gen(..))
import
Test.QuickCheck
(
elements
)
import
Test.QuickCheck.Arbitrary
import
Test.QuickCheck.Arbitrary
(
Arbitrary
(
arbitrary
)
)
data
PartitionMethod
=
Spinglass
|
Confluence
|
Infomap
deriving
(
Generic
,
Eq
,
Ord
,
Enum
,
Bounded
,
Show
)
...
...
src/Gargantext/Core/Viz/Graph/Types.hs
View file @
72f5f35c
...
...
@@ -24,12 +24,11 @@ import Data.Text (pack)
import
Database.PostgreSQL.Simple.FromField
(
FromField
(
..
))
import
Gargantext.API.Ngrams.Types
(
NgramsTerm
)
import
Gargantext.Core.Methods.Similarities
(
GraphMetric
)
import
Gargantext.Core.T
ypes
(
ListId
)
import
Gargantext.Core.T
ext.Ngrams
(
NgramsType
(
..
)
)
import
Gargantext.Core.Utils.Prefix
(
unPrefix
,
unPrefixSwagger
)
import
Gargantext.Database.Admin.Types.Hyperdata.Prelude
(
Hyperdata
)
import
Gargantext.Database.Admin.Types.Node
(
NodeId
)
import
Gargantext.Database.Admin.Types.Node
(
ListId
,
NodeId
)
import
Gargantext.Database.Prelude
(
fromField'
)
import
Gargantext.Database.Schema.Ngrams
(
NgramsType
(
..
))
import
Gargantext.Prelude
import
Opaleye
(
DefaultFromField
(
..
),
SqlJsonb
,
fromPGSFromField
)
import
Test.QuickCheck
(
elements
)
...
...
src/Gargantext/Core/Viz/Phylo/API/Tools.hs
View file @
72f5f35c
...
...
@@ -11,18 +11,17 @@ Portability : POSIX
{-# OPTIONS_GHC -fno-warn-deprecations #-}
{-# LANGUAGE TypeApplications #-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE TypeApplications #-}
module
Gargantext.Core.Viz.Phylo.API.Tools
where
import
Control.Lens
hiding
(
Context
)
import
Control.Lens
(
to
,
view
)
import
Data.Aeson
(
Value
,
decodeFileStrict
,
encode
,
eitherDecodeFileStrict'
)
import
Data.ByteString.Lazy
qualified
as
Lazy
import
Data.Map.Strict
qualified
as
Map
import
Data.Proxy
import
Data.Set
qualified
as
Set
import
Data.Text
(
pack
)
import
Data.Time.Calendar
(
fromGregorian
,
diffGregorianDurationClip
,
cdMonths
,
diffDays
,
showGregorian
)
...
...
@@ -30,31 +29,31 @@ import Data.Time.Clock.POSIX(posixSecondsToUTCTime)
import
Gargantext.API.Ngrams.Prelude
(
getTermList
)
import
Gargantext.API.Ngrams.Types
(
NgramsTerm
(
..
))
import
Gargantext.Core
(
withDefaultLanguage
,
Lang
)
import
Gargantext.Core.NodeStory
(
HasNodeStory
)
import
Gargantext.Core.NodeStory.Types
(
HasNodeStory
)
import
Gargantext.Core.Text.Ngrams
(
NgramsType
(
..
))
import
Gargantext.Core.Text.Terms.WithList
(
Patterns
,
buildPatterns
,
termsInText
)
import
Gargantext.Core.Types
(
Context
,
nodeId2ContextId
)
import
Gargantext.Core.Types.Main
(
ListType
(
MapTerm
))
import
Gargantext.Core.Viz.Phylo
(
TimeUnit
(
..
),
Date
,
Document
(
..
),
PhyloConfig
(
..
),
Phylo
)
import
Gargantext.Core.Viz.Phylo.PhyloExport
(
toPhyloExport
,
dotToFile
)
import
Gargantext.Core.Viz.Phylo.PhyloMaker
(
toPhylo
,
toPhyloWithoutLink
)
import
Gargantext.Core.Viz.Phylo.PhyloTools
(
{-printIOMsg, printIOComment,-}
setConfig
)
import
Gargantext.Database.Admin.Types.Hyperdata
(
HyperdataPhylo
(
..
),
HyperdataCorpus
(
..
)
)
import
Gargantext.Database.Admin.Types.Hyperdata
.Corpus
(
HyperdataCorpus
(
..
)
)
import
Gargantext.Database.Admin.Types.Hyperdata.Document
(
HyperdataDocument
(
..
))
import
Gargantext.Database.Admin.Types.Node
(
CorpusId
,
ContextId
,
PhyloId
)
import
Gargantext.Database.Admin.Types.Hyperdata.Phylo
(
HyperdataPhylo
(
..
)
)
import
Gargantext.Database.Admin.Types.Node
(
Context
,
CorpusId
,
ContextId
,
PhyloId
,
nodeId2ContextId
)
import
Gargantext.Database.Prelude
(
DBCmd
)
import
Gargantext.Database.Query.Table.Node
(
defaultList
,
getNodeWith
)
import
Gargantext.Database.Query.Table.Node.Error
(
HasNodeError
)
import
Gargantext.Database.Query.Table.NodeContext
(
selectDocNodes
)
import
Gargantext.Database.Schema.Context
import
Gargantext.Database.Schema.Ngrams
(
NgramsType
(
..
))
import
Gargantext.Database.Schema.Node
import
Gargantext.Database.Schema.Context
(
ContextPoly
(
_context_hyperdata
,
_context_id
)
)
import
Gargantext.Database.Schema.Node
(
NodePoly
(
_node_hyperdata
),
node_hyperdata
)
import
Gargantext.Prelude
hiding
(
to
)
import
Gargantext.System.Logging
import
Gargantext.System.Logging
(
MonadLogger
,
LogLevel
(
DEBUG
),
logLocM
)
import
Gargantext.Utils.UTCTime
(
timeMeasured
)
import
Prelude
qualified
import
System.FilePath
((
</>
))
import
System.IO.Temp
(
withTempDirectory
)
import
System.Process
qualified
as
Shell
import
Gargantext.Utils.UTCTime
(
timeMeasured
)
--------------------------------------------------------------------
getPhyloData
::
HasNodeError
err
...
...
src/Gargantext/Core/Viz/Phylo/Legacy/LegacyMain.hs
View file @
72f5f35c
...
...
@@ -13,30 +13,30 @@ Portability : POSIX
{-# LANGUAGE MonoLocalBinds #-}
{-# LANGUAGE TypeApplications #-}
{-# LANGUAGE ViewPatterns #-}
module
Gargantext.Core.Viz.Phylo.Legacy.LegacyMain
where
import
Control.Lens
hiding
(
Level
)
import
Control.Lens
(
to
,
view
)
import
Data.HashMap.Strict
qualified
as
HashMap
import
Data.List
qualified
as
List
import
Data.Proxy
import
Data.Set
qualified
as
Set
import
Data.Text
qualified
as
Text
import
Gargantext.API.Ngrams.Tools
(
getTermsWith
)
import
Gargantext.API.Ngrams.Types
import
Gargantext.API.Ngrams.Types
(
NgramsTerm
(
..
))
import
Gargantext.Core
(
HasDBid
,
withDefaultLanguage
)
import
Gargantext.Core.NodeStory
(
HasNodeStory
)
import
Gargantext.Core.NodeStory
.Types
(
HasNodeStory
)
import
Gargantext.Core.Text.Context
(
TermList
)
import
Gargantext.Core.Text.Terms.WithList
import
Gargantext.Core.Types
import
Gargantext.Core.Text.Ngrams
(
NgramsType
(
..
))
import
Gargantext.Core.Text.Terms.WithList
(
buildPatterns
,
termsInText
,
Patterns
)
import
Gargantext.Core.Types.Main
(
ListType
(
MapTerm
)
)
import
Gargantext.Database.Admin.Types.Node
(
NodeType
,
CorpusId
)
import
Gargantext.Core.Viz.LegacyPhylo
hiding
(
Svg
,
Dot
)
import
Gargantext.Database.Admin.Types.Hyperdata
import
Gargantext.Database.Admin.Types.Hyperdata.Corpus
(
HyperdataCorpus
(
_hc_lang
)
)
import
Gargantext.Database.Admin.Types.Hyperdata.Document
(
HyperdataDocument
(
_hd_abstract
,
_hd_publication_year
)
)
import
Gargantext.Database.Query.Table.Node
(
defaultList
,
getNodeWith
)
import
Gargantext.Database.Query.Table.NodeContext
(
selectDocs
)
import
Gargantext.Database.Schema.Ngrams
(
NgramsType
(
..
))
import
Gargantext.Database.Schema.Node
import
Gargantext.Database.Schema.Node
(
node_hyperdata
)
import
Gargantext.Prelude
hiding
(
to
)
type
MinSizeBranch
=
Int
...
...
src/Gargantext/Database/Action/Flow.hs
View file @
72f5f35c
...
...
@@ -51,7 +51,7 @@ module Gargantext.Database.Action.Flow -- (flowDatabase, ngrams2list)
where
import
Conduit
import
Control.Lens
hiding
(
elements
,
Indexed
)
import
Control.Lens
(
(
^.
),
to
,
view
,
over
)
import
Data.Bifunctor
qualified
as
B
import
Data.Conduit
qualified
as
C
import
Data.Conduit.Internal
(
zipSources
)
...
...
@@ -60,56 +60,57 @@ import Data.Conduit.List qualified as CList
import
Data.HashMap.Strict
qualified
as
HashMap
import
Data.List
qualified
as
List
import
Data.Map.Strict
qualified
as
Map
import
Data.Proxy
import
Data.Set
qualified
as
Set
import
Data.Text
qualified
as
T
import
EPO.API.Client.Types
qualified
as
EPO
import
Gargantext.API.Ngrams.Tools
(
getTermsWith
)
import
Gargantext.Core
(
Lang
(
..
),
NLPServerConfig
)
import
Gargantext.Core
(
withDefaultLanguage
)
import
Gargantext.Core
.Text.Ngrams
(
NgramsType
(
NgramsTerms
),
Ngrams
(
_ngramsTerms
)
)
import
Gargantext.Core
(
Lang
(
..
),
NLPServerConfig
,
withDefaultLanguage
)
import
Gargantext.Core.Ext.IMTUser
(
readFile_Annuaire
)
import
Gargantext.Core.NLP
(
HasNLPServer
,
nlpServerGet
)
import
Gargantext.Core.NodeStory
(
HasNodeStory
)
import
Gargantext.Core.NodeStory
.Types
(
HasNodeStory
)
import
Gargantext.Core.Text.Corpus.API
qualified
as
API
import
Gargantext.Core.Text.Corpus.Parsers
(
parseFile
,
FileFormat
,
FileType
)
import
Gargantext.Core.Text.List
(
buildNgramsLists
)
import
Gargantext.Core.Text.List.Group.WithStem
(
{-StopSize(..),-}
GroupParams
(
..
))
import
Gargantext.Core.Text.List.Group.WithStem
(
GroupParams
(
..
))
import
Gargantext.Core.Text.List.Social
(
FlowSocialListWith
(
..
))
import
Gargantext.Core.Text.Terms
import
Gargantext.Core.Text.Terms.Mono.Stem
(
stem
,
StemmingAlgorithm
(
..
))
import
Gargantext.Core.Types
(
HasValidationError
,
TermsCount
)
import
Gargantext.Core.Types.Individu
(
User
(
..
))
import
Gargantext.Core.Types.Main
import
Gargantext.Core.Types.Main
(
CorpusName
,
ListType
(
MapTerm
)
)
import
Gargantext.Core.Types.Query
(
Limit
)
import
Gargantext.Database.Action.Flow.Extract
()
-- ExtractNgramsT instances
import
Gargantext.Database.Action.Flow.List
import
Gargantext.Database.Action.Flow.List
(
flowList_DbRepo
,
toNodeNgramsW'
)
import
Gargantext.Database.Action.Flow.Types
(
do_api
,
DataOrigin
(
..
),
DataText
(
..
),
FlowCorpus
)
import
Gargantext.Database.Action.Flow.Utils
(
docNgrams
,
documentIdWithNgrams
,
insertDocNgrams
,
insertDocs
,
mapNodeIdNgrams
)
import
Gargantext.Database.Action.Flow.Types
import
Gargantext.Database.Action.Metrics
(
updateNgramsOccurrences
,
updateContextScore
)
import
Gargantext.Database.Action.Search
(
searchDocInDatabase
)
import
Gargantext.Database.Admin.Config
(
userMaster
,
corpusMasterName
)
import
Gargantext.Database.Admin.Types.Hyperdata
import
Gargantext.Database.Admin.Types.Hyperdata.Contact
(
HyperdataContact
)
import
Gargantext.Database.Admin.Types.Hyperdata.Corpus
(
HyperdataAnnuaire
,
HyperdataCorpus
(
_hc_lang
)
)
import
Gargantext.Database.Admin.Types.Hyperdata.Document
(
ToHyperdataDocument
(
toHyperdataDocument
)
)
import
Gargantext.Database.Admin.Types.Node
hiding
(
DEBUG
)
-- (HyperdataDocument(..), NodeType(..), NodeId, UserId, ListId, CorpusId, RootId, MasterCorpusId, MasterUserId)
import
Gargantext.Database.Prelude
(
DbCmd
'
,
DBCmd
,
hasConfig
)
import
Gargantext.Database.Query.Table.ContextNodeNgrams2
import
Gargantext.Database.Query.Table.Ngrams
import
Gargantext.Database.Query.Table.Node
import
Gargantext.Database.Query.Table.ContextNodeNgrams2
(
ContextNodeNgrams2Poly
(
..
),
insertContextNodeNgrams2
)
import
Gargantext.Database.Query.Table.Node
(
MkCorpus
,
insertDefaultNodeIfNotExists
,
getOrMkList
,
getNodeWith
)
import
Gargantext.Database.Query.Table.Node.Document.Add
qualified
as
Doc
(
add
)
import
Gargantext.Database.Query.Table.Node.Document.Insert
-- (insertDocuments, ReturnId(..), addUniqIdsDoc, addUniqIdsContact, ToDbData(..))
import
Gargantext.Database.Query.Table.Node.Document.Insert
(
ToNode
(
toNode
)
)
-- (insertDocuments, ReturnId(..), addUniqIdsDoc, addUniqIdsContact, ToDbData(..))
import
Gargantext.Database.Query.Table.Node.Error
(
HasNodeError
(
..
))
import
Gargantext.Database.Query.Table.NodeContext
(
selectDocNodes
)
import
Gargantext.Database.Query.Table.NodeNgrams
(
listInsertDb
,
getCgramsId
)
import
Gargantext.Database.Query.Tree.Root
(
getOrMkRoot
,
getOrMk_RootWithCorpus
)
import
Gargantext.Database.Schema.Ngrams
(
indexNgrams
,
text2ngrams
)
import
Gargantext.Database.Schema.Node
(
node_hyperdata
)
import
Gargantext.Prelude
hiding
(
to
)
import
Gargantext.Prelude.Config
(
GargConfig
(
..
))
import
Gargantext.System.Logging
import
Gargantext.Utils.Jobs
(
JobHandle
,
MonadJobStatus
(
..
)
)
import
Gargantext.System.Logging
(
logLocM
,
LogLevel
(
DEBUG
),
MonadLogger
)
import
Gargantext.Utils.Jobs
.Monad
(
JobHandle
,
MonadJobStatus
(
..
)
)
import
PUBMED.Types
qualified
as
PUBMED
------------------------------------------------------------------------
-- Imports for upgrade function
import
Gargantext.Database.Query.Tree
(
HasTreeError
)
import
Gargantext.Database.Query.Tree
.Error
(
HasTreeError
)
------------------------------------------------------------------------
...
...
@@ -182,11 +183,11 @@ flowDataText u (DataOld ids) tt cid mfslw _ = do
_
<-
Doc
.
add
userCorpusId
(
map
nodeId2ContextId
ids
)
flowCorpusUser
(
_tt_lang
tt
)
u
userCorpusId
listId
corpusType
mfslw
where
corpusType
=
(
Nothing
::
Maybe
HyperdataCorpus
)
corpusType
=
Nothing
::
Maybe
HyperdataCorpus
flowDataText
u
(
DataNew
(
mLen
,
txtC
))
tt
cid
mfslw
jobHandle
=
do
$
(
logLocM
)
DEBUG
$
T
.
pack
$
"Found "
<>
show
mLen
<>
" new documents to process"
for_
(
mLen
<&>
fromInteger
)
(`
addMoreSteps
`
jobHandle
)
flowCorpus
u
(
Right
[
cid
])
tt
mfslw
(
fromMaybe
0
mLen
,
(
transPipe
liftBase
txtC
)
)
jobHandle
flowCorpus
u
(
Right
[
cid
])
tt
mfslw
(
fromMaybe
0
mLen
,
transPipe
liftBase
txtC
)
jobHandle
------------------------------------------------------------------------
-- TODO use proxy
...
...
@@ -199,13 +200,13 @@ flowAnnuaire :: ( DbCmd' env err m
,
MonadJobStatus
m
)
=>
User
->
Either
CorpusName
[
CorpusId
]
->
(
TermType
Lang
)
->
TermType
Lang
->
FilePath
->
JobHandle
m
->
m
AnnuaireId
flowAnnuaire
u
n
l
filePath
jobHandle
=
do
-- TODO Conduit for file
docs
<-
liftBase
$
(
(
readFile_Annuaire
filePath
)
::
IO
[
HyperdataContact
])
docs
<-
liftBase
$
(
readFile_Annuaire
filePath
::
IO
[
HyperdataContact
])
flow
(
Nothing
::
Maybe
HyperdataAnnuaire
)
u
n
l
Nothing
(
fromIntegral
$
length
docs
,
yieldMany
docs
)
jobHandle
------------------------------------------------------------------------
...
...
@@ -362,10 +363,11 @@ flowCorpusUser l user userCorpusId listId ctype mfslw = do
_
<-
reIndexWith
userCorpusId
listId
NgramsTerms
(
Set
.
singleton
MapTerm
)
_
<-
updateContextScore
userCorpusId
listId
_
<-
updateNgramsOccurrences
userCorpusId
listId
pure
userCorpusId
-- | This function is responsible for contructing terms.
buildSocialList
::
(
HasNodeError
err
,
HasValidationError
err
,
HasNLPServer
env
...
...
@@ -389,8 +391,12 @@ buildSocialList l user userCorpusId listId ctype mfslw = do
nlpServer
<-
view
(
nlpServerGet
l
)
--let gp = (GroupParams l 2 3 (StopSize 3))
-- Here the PosTagAlgo should be chosen according to the Lang
ngs
<-
buildNgramsLists
user
userCorpusId
masterCorpusId
mfslw
$
GroupWithPosTag
l
nlpServer
HashMap
.
empty
-- let gp = GroupParams { unGroupParams_lang = l
-- , unGroupParams_len = 10
-- , unGroupParams_limit = 10
-- , unGroupParams_stopSize = StopSize 10 }
let
gp
=
GroupWithPosTag
l
nlpServer
HashMap
.
empty
ngs
<-
buildNgramsLists
user
userCorpusId
masterCorpusId
mfslw
gp
-- printDebug "flowCorpusUser:ngs" ngs
...
...
@@ -425,7 +431,7 @@ insertMasterDocs ncs c lang hs = do
(
extractNgramsT
ncs
$
withLang
lang
documentsWithId
)
(
map
(
B
.
first
contextId2NodeId
)
documentsWithId
)
lId
<-
getOrMkList
masterCorpusId
masterUserId
lId
<-
getOrMkList
masterCorpusId
masterUserId
-- _ <- saveDocNgramsWith lId mapNgramsDocs'
_
<-
saveDocNgramsWith
lId
mapNgramsDocs'
...
...
@@ -445,13 +451,13 @@ saveDocNgramsWith lId mapNgramsDocs' = do
-- new
mapCgramsId
<-
listInsertDb
lId
toNodeNgramsW'
$
map
(
first
_ngramsTerms
.
second
Map
.
keys
)
$
map
(
bimap
_ngramsTerms
Map
.
keys
)
$
HashMap
.
toList
mapNgramsDocs
--printDebug "saveDocNgramsWith" mapCgramsId
-- insertDocNgrams
let
ngrams2insert
=
catMaybes
[
ContextNodeNgrams2
<$>
Just
(
nodeId2ContextId
nId
)
<
*>
(
getCgramsId
mapCgramsId
ngrams_type
(
_ngramsTerms
terms''
)
)
let
ngrams2insert
=
catMaybes
[
ContextNodeNgrams2
(
nodeId2ContextId
nId
)
<
$>
getCgramsId
mapCgramsId
ngrams_type
(
_ngramsTerms
terms''
)
<*>
Just
(
fromIntegral
w
::
Double
)
|
(
terms''
,
mapNgramsTypes
)
<-
HashMap
.
toList
mapNgramsDocs
,
(
ngrams_type
,
mapNodeIdWeight
)
<-
Map
.
toList
mapNgramsTypes
...
...
@@ -498,5 +504,5 @@ reIndexWith cId lId nt lts = do
$
map
(
docNgrams
corpusLang
nt
ts
)
docs
-- Saving the indexation in database
_
<-
mapM
(
saveDocNgramsWith
lId
)
ngramsByDoc
mapM_
(
saveDocNgramsWith
lId
)
ngramsByDoc
pure
()
src/Gargantext/Database/Action/Flow/Extract.hs
View file @
72f5f35c
...
...
@@ -20,15 +20,17 @@ module Gargantext.Database.Action.Flow.Extract
import
Control.Lens
((
^.
),
_Just
,
view
)
import
Data.HashMap.Strict
qualified
as
HashMap
import
Data.Map.Strict
qualified
as
DM
import
Gargantext.Core
(
Lang
,
NLPServerConfig
,
PosTagAlgo
(
CoreNLP
))
import
Gargantext.Core
(
Lang
,
NLPServerConfig
(
server
))
import
Gargantext.Core.Text
(
HasText
(
..
))
import
Gargantext.Core.Text.Corpus.Parsers
(
splitOn
)
import
Gargantext.Core.Text.Ngrams
(
NgramsType
(
..
))
import
Gargantext.Core.Text.Terms
(
ExtractNgramsT
,
ExtractedNgrams
(
..
),
TermType
,
cleanExtractedNgrams
,
enrichedTerms
,
extractNgramsT
,
extractTerms
,
tt_lang
)
import
Gargantext.Core.Types
(
POS
(
NP
),
TermsCount
)
import
Gargantext.Database.Admin.Types.Hyperdata
(
HyperdataContact
,
HyperdataDocument
,
cw_lastName
,
hc_who
,
hd_authors
,
hd_bdd
,
hd_institutes
,
hd_source
)
import
Gargantext.Database.Admin.Types.Node
import
Gargantext.Database.Admin.Types.Hyperdata.Contact
(
HyperdataContact
,
cw_lastName
,
hc_who
)
import
Gargantext.Database.Admin.Types.Hyperdata.Document
(
HyperdataDocument
,
hd_authors
,
hd_bdd
,
hd_institutes
,
hd_source
)
import
Gargantext.Database.Admin.Types.Node
(
Node
)
import
Gargantext.Database.Prelude
(
DBCmd
)
import
Gargantext.Database.Schema.Ngrams
import
Gargantext.Database.Schema.Ngrams
(
text2ngrams
)
import
Gargantext.Database.Schema.Node
(
NodePoly
(
..
))
import
Gargantext.Prelude
...
...
@@ -49,6 +51,9 @@ instance ExtractNgramsT HyperdataContact
pure
$
HashMap
.
fromList
$
[(
SimpleNgrams
a'
,
(
DM
.
singleton
Authors
1
,
1
))
|
a'
<-
authors
]
-- | Main ngrams extraction functionality.
-- For NgramsTerms, this calls NLP server under the hood.
-- For Sources, Institutes, Authors, this uses simple split on " ".
instance
ExtractNgramsT
HyperdataDocument
where
extractNgramsT
::
NLPServerConfig
...
...
@@ -72,9 +77,8 @@ instance ExtractNgramsT HyperdataDocument
$
maybe
[
"Nothing"
]
(
splitOn
Authors
(
doc
^.
hd_bdd
))
$
doc
^.
hd_authors
termsWithCounts'
<-
map
(
\
(
t
,
cnt
)
->
(
enrichedTerms
(
lang
^.
tt_lang
)
CoreNLP
NP
t
,
cnt
))
<$>
concat
<$>
liftBase
(
extractTerms
ncs
lang
$
hasText
doc
)
termsWithCounts'
<-
map
(
first
(
enrichedTerms
(
lang
^.
tt_lang
)
(
server
ncs
)
NP
))
.
concat
<$>
liftBase
(
extractTerms
ncs
lang
$
hasText
doc
)
pure
$
HashMap
.
fromList
$
[(
SimpleNgrams
source
,
(
DM
.
singleton
Sources
1
,
1
))
]
...
...
src/Gargantext/Database/Action/Flow/List.hs
View file @
72f5f35c
...
...
@@ -11,8 +11,6 @@ Portability : POSIX
{-# LANGUAGE ConstraintKinds #-}
{-# LANGUAGE ConstrainedClassMethods #-}
{-# LANGUAGE ConstraintKinds #-}
{-# LANGUAGE InstanceSigs #-}
module
Gargantext.Database.Action.Flow.List
where
...
...
@@ -27,12 +25,11 @@ import Gargantext.API.Ngrams (saveNodeStory)
import
Gargantext.API.Ngrams.Tools
(
getNodeStory
)
import
Gargantext.API.Ngrams.Types
import
Gargantext.Core.NodeStory
(
HasNodeStory
,
a_history
,
a_state
,
a_version
)
import
Gargantext.Core.Text.Ngrams
(
NgramsType
(
..
))
import
Gargantext.Core.Types
(
HasValidationError
(
..
),
assertValid
)
import
Gargantext.Core.Types.Main
(
ListType
(
CandidateTerm
))
import
Gargantext.Database.Admin.Types.Node
import
Gargantext.Database.Query.Table.Ngrams
qualified
as
TableNgrams
import
Gargantext.Database.Query.Table.NodeNgrams
(
NodeNgramsPoly
(
..
),
NodeNgramsW
,
listInsertDb
,
{- getCgramsId -}
)
import
Gargantext.Database.Schema.Ngrams
(
NgramsType
(
..
))
import
Gargantext.Prelude
hiding
(
toList
)
-- FLOW LIST
...
...
@@ -169,7 +166,7 @@ listInsert lId ngs = mapM_ (\(typeList, ngElmts)
-- the repo, they will be ignored.
putListNgrams
::
(
HasValidationError
err
,
HasNodeStory
env
err
m
)
=>
NodeId
->
TableNgrams
.
NgramsType
->
NgramsType
->
[
NgramsElement
]
->
m
()
putListNgrams
_
_
[]
=
pure
()
...
...
@@ -179,7 +176,7 @@ putListNgrams nodeId ngramsType nes = putListNgrams' nodeId ngramsType m
putListNgrams'
::
(
HasValidationError
err
,
HasNodeStory
env
err
m
)
=>
NodeId
->
TableNgrams
.
NgramsType
->
NgramsType
->
Map
NgramsTerm
NgramsRepoElement
->
m
()
putListNgrams'
listId
ngramsType'
ns
=
do
...
...
src/Gargantext/Database/Action/Flow/Pairing.hs
View file @
72f5f35c
...
...
@@ -11,7 +11,6 @@ Portability : POSIX
{-# OPTIONS_GHC -fno-warn-deprecations #-}
{-# LANGUAGE QuasiQuotes #-}
{-# LANGUAGE Arrows #-}
module
Gargantext.Database.Action.Flow.Pairing
...
...
@@ -25,28 +24,27 @@ import Data.HashMap.Strict qualified as HashMap
import
Data.List
qualified
as
List
import
Data.Set
qualified
as
Set
import
Data.Text
qualified
as
Text
import
Gargantext.API.Ngrams.Tools
import
Gargantext.API.Ngrams.Tools
(
filterListWithRoot
,
getRepo
,
groupNodesByNgrams
,
mapTermListRoot
)
import
Gargantext.API.Ngrams.Types
(
NgramsTerm
(
..
))
import
Gargantext.Core
import
Gargantext.Core.NodeStory
(
HasNodeStory
)
import
Gargantext.Core
(
HasDBid
(
toDBid
)
)
import
Gargantext.Core.NodeStory
.Types
(
HasNodeStory
)
import
Gargantext.Core.Text.Metrics.CharByChar
(
levenshtein
)
import
Gargantext.Core.Text.Ngrams
(
NgramsType
(
..
))
import
Gargantext.Core.Types
(
TableResult
(
..
))
import
Gargantext.Core.Types.Main
import
Gargantext.Database
import
Gargantext.Core.Types.Main
(
ListType
(
CandidateTerm
,
MapTerm
)
)
import
Gargantext.Database.Action.Metrics.NgramsByContext
(
getContextsByNgramsOnlyUser
)
import
Gargantext.Database.Admin.Config
import
Gargantext.Database.Admin.Types.Hyperdata
-- (HyperdataContact(..))
import
Gargantext.Database.Admin.Config
(
userMaster
)
import
Gargantext.Database.Admin.Types.Hyperdata
.Contact
(
HyperdataContact
,
cw_firstName
,
cw_lastName
,
hc_who
)
-- (HyperdataContact(..))
import
Gargantext.Database.Admin.Types.Node
-- (AnnuaireId, CorpusId, ListId, DocId, ContactId, NodeId)
import
Gargantext.Database.Query.Prelude
(
returnA
,
queryNodeNodeTable
)
import
Gargantext.Database.Prelude
(
Cmd
,
DBCmd
,
runOpaQuery
)
import
Gargantext.Database.Query.Prelude
(
returnA
)
import
Gargantext.Database.Query.Table.Node
(
defaultList
)
import
Gargantext.Database.Query.Table.Node.Children
(
getAllContacts
)
import
Gargantext.Database.Query.Table.Node.Error
(
HasNodeError
)
import
Gargantext.Database.Query.Table.Node.Select
(
selectNodesWithUsername
)
import
Gargantext.Database.Query.Table.NodeContext_NodeContext
(
insertNodeContext_NodeContext
)
import
Gargantext.Database.Query.Table.NodeNode
(
insertNodeNode
)
import
Gargantext.Database.Prelude
(
Cmd
,
runOpaQuery
)
import
Gargantext.Database.Schema.Ngrams
-- (NgramsType(..))
import
Gargantext.Database.Schema.Node
import
Gargantext.Database.Query.Table.NodeNode
import
Gargantext.Database.Schema.Node
(
node_hyperdata
,
node_id
,
node_typename
,
queryNodeTable
)
import
Gargantext.Prelude
hiding
(
sum
)
import
Opaleye
...
...
src/Gargantext/Database/Action/Flow/Types.hs
View file @
72f5f35c
...
...
@@ -23,23 +23,23 @@ import Data.Aeson (ToJSON)
import
Data.Aeson.TH
(
deriveJSON
)
import
Data.HashMap.Strict
(
HashMap
)
import
Data.Swagger
(
ToSchema
(
..
),
genericDeclareNamedSchema
)
import
Gargantext.Core.Flow.Types
import
Gargantext.Core.NodeStory
import
Gargantext.Core.Text
import
Gargantext.Core.Text.Corpus.API
qualified
as
API
import
Gargantext.Core.Text.Terms
import
Gargantext.Core.Flow.Types
(
UniqId
)
import
Gargantext.Core.NodeStory.Types
(
HasNodeStory
)
import
Gargantext.Core.Text
(
HasText
)
import
Gargantext.API.Admin.Orchestrator.Types
qualified
as
API
import
Gargantext.Core.Text.Ngrams
(
NgramsType
(
..
))
import
Gargantext.Core.Text.Terms
(
ExtractNgramsT
)
import
Gargantext.Core.Types
(
HasValidationError
,
TermsCount
)
import
Gargantext.Core.Utils.Prefix
(
unPrefix
,
unPrefixSwagger
)
import
Gargantext.Database.Admin.Types.Hyperdata
(
HyperdataDocument
)
import
Gargantext.Database.Admin.Types.Hyperdata
.Document
(
HyperdataDocument
)
import
Gargantext.Database.Admin.Types.Node
(
NodeId
)
import
Gargantext.Database.Prelude
(
CmdM
)
import
Gargantext.Database.Query.Table.Node.Document.Insert
import
Gargantext.Database.Query.Table.Node.Document.Insert
(
UniqParameters
,
InsertDb
,
ToNode
,
AddUniqId
)
import
Gargantext.Database.Query.Table.Node.Error
(
HasNodeError
)
import
Gargantext.Database.Query.Tree.Error
(
HasTreeError
)
import
Gargantext.Database.Schema.Ngrams
(
NgramsType
(
..
))
import
Gargantext.Database.Types
(
Indexed
)
import
Gargantext.Prelude
import
Gargantext.System.Logging
import
Gargantext.System.Logging
(
MonadLogger
)
type
FlowCmdM
env
err
m
=
...
...
src/Gargantext/Database/Action/Flow/Utils.hs
View file @
72f5f35c
...
...
@@ -11,9 +11,6 @@ Portability : POSIX
{-# OPTIONS_GHC -fno-warn-orphans #-}
{-# LANGUAGE InstanceSigs #-}
module
Gargantext.Database.Action.Flow.Utils
(
docNgrams
,
documentIdWithNgrams
...
...
@@ -31,12 +28,13 @@ import Data.Text qualified as T
import
Gargantext.API.Ngrams.Types
qualified
as
NT
import
Gargantext.Core
(
Lang
,
toDBid
)
import
Gargantext.Core.Flow.Types
(
UniqId
,
uniqId
)
import
Gargantext.Core.Text.Ngrams
(
Ngrams
,
NgramsType
)
import
Gargantext.Core.Text.Terms.WithList
(
MatchedText
,
buildPatternsWith
,
termsInText
)
import
Gargantext.Core.Types
(
TermsCount
)
import
Gargantext.Core.Utils
(
addTuples
)
import
Gargantext.Data.HashMap.Strict.Utils
qualified
as
HashMap
import
Gargantext.Database.Action.Flow.Types
(
DocumentIdWithNgrams
(
..
),
FlowInsertDB
)
import
Gargantext.Database.Admin.Types.Hyperdata
(
HyperdataDocument
,
hd_abstract
,
hd_title
)
import
Gargantext.Database.Admin.Types.Hyperdata
.Document
(
HyperdataDocument
,
hd_abstract
,
hd_title
)
import
Gargantext.Database.Admin.Types.Node
import
Gargantext.Database.Prelude
(
DBCmd
,
DbCmd
'
)
import
Gargantext.Database.Query.Table.ContextNodeNgrams
...
...
@@ -44,8 +42,8 @@ import Gargantext.Database.Query.Table.Node.Document.Add qualified as Doc (add)
import
Gargantext.Database.Query.Table.Node.Document.Insert
(
ReturnId
,
addUniqId
,
insertDb
,
reId
,
reInserted
,
reUniqId
)
import
Gargantext.Database.Query.Table.Node.Error
(
HasNodeError
(
..
))
import
Gargantext.Database.Schema.Context
(
context_hyperdata
,
context_id
)
import
Gargantext.Database.Schema.Ngrams
import
Gargantext.Database.Types
import
Gargantext.Database.Schema.Ngrams
(
NgramsId
,
NgramsTypeId
(
..
))
import
Gargantext.Database.Types
(
Indexed
(
..
),
index
)
import
Gargantext.Prelude
import
Gargantext.Prelude.Crypto.Hash
(
Hash
)
...
...
src/Gargantext/Database/Action/Metrics/NgramsByContext.hs
View file @
72f5f35c
...
...
@@ -27,11 +27,12 @@ import Database.PostgreSQL.Simple.ToField qualified as DPS
import
Database.PostgreSQL.Simple.Types
(
Values
(
..
),
QualifiedIdentifier
(
..
))
import
Database.PostgreSQL.Simple.Types
qualified
as
DPST
import
Gargantext.API.Ngrams.Types
(
NgramsTerm
(
..
))
import
Gargantext.Core
import
Gargantext.Data.HashMap.Strict.Utils
as
HM
import
Gargantext.Core
(
HasDBid
(
toDBid
)
)
import
Gargantext.Core.Text.Ngrams
(
NgramsType
(
..
))
import
Gargantext.Data.HashMap.Strict.Utils
as
HM
(
unionsWith
)
import
Gargantext.Database.Admin.Types.Node
(
ListId
,
CorpusId
,
NodeId
(
..
),
ContextId
(
..
),
MasterCorpusId
,
NodeType
(
NodeDocument
),
UserCorpusId
,
DocId
)
import
Gargantext.Database.Prelude
(
DBCmd
,
runPGSQuery
)
import
Gargantext.Database.Schema.Ngrams
(
NgramsType
(
..
))
import
Gargantext.Database.Schema.Ngrams
(
)
-- toDBid instance
import
Gargantext.Prelude
-- | fst is size of Supra Corpus
...
...
src/Gargantext/Database/Action/Metrics/TFICF.hs
View file @
72f5f35c
...
...
@@ -9,8 +9,6 @@ Portability : POSIX
-}
{-# LANGUAGE QuasiQuotes #-}
module
Gargantext.Database.Action.Metrics.TFICF
where
...
...
@@ -20,11 +18,11 @@ import Data.Set qualified as Set
import
Gargantext.API.Ngrams.Types
import
Gargantext.Core
import
Gargantext.Core.Text.Metrics.TFICF
import
Gargantext.Core.Text.Ngrams
(
NgramsType
(
..
))
import
Gargantext.Database.Action.Metrics.NgramsByContext
(
getContextsByNgramsUser
,
{-getOccByNgramsOnlyFast,-}
getOccByNgramsOnlyFast_withSample
)
import
Gargantext.Database.Admin.Types.Node
-- (ListId, CorpusId, NodeId)
import
Gargantext.Database.Prelude
(
DBCmd
)
import
Gargantext.Database.Query.Table.NodeContext
(
selectCountDocs
)
import
Gargantext.Database.Schema.Ngrams
(
NgramsType
(
..
))
import
Gargantext.Prelude
{-
...
...
src/Gargantext/Database/Action/Search.hs
View file @
72f5f35c
...
...
@@ -23,7 +23,7 @@ module Gargantext.Database.Action.Search (
import
Control.Arrow
(
returnA
)
import
Control.Lens
((
^.
),
view
)
import
Data.BoolExpr
import
Data.BoolExpr
(
BoolExpr
(
..
),
Signed
(
Negative
,
Positive
)
)
import
Data.List
qualified
as
List
import
Data.Map.Strict
qualified
as
Map
import
Data.Profunctor.Product
(
p4
)
...
...
@@ -31,25 +31,26 @@ import Data.Set qualified as Set
import
Data.Text
(
unpack
)
import
Data.Text
qualified
as
T
import
Data.Time
(
UTCTime
)
import
Gargantext.Core
import
Gargantext.Core
(
Lang
(
EN
),
HasDBid
(
toDBid
)
)
import
Gargantext.Core.Text.Corpus.Query
qualified
as
API
import
Gargantext.Core.Text.Terms.Mono.Stem
(
stem
,
StemmingAlgorithm
(
..
))
import
Gargantext.Core.Text.Ngrams
(
NgramsType
(
..
))
import
Gargantext.Core.Types
import
Gargantext.Core.Types.Query
(
IsTrash
,
Limit
,
Offset
)
import
Gargantext.Database.Admin.Types.Hyperdata
(
HyperdataDocument
(
..
),
HyperdataContact
(
..
))
import
Gargantext.Database.Admin.Types.Hyperdata.Contact
(
HyperdataContact
(
..
)
)
import
Gargantext.Database.Admin.Types.Hyperdata.Document
(
HyperdataDocument
(
..
)
)
import
Gargantext.Database.Prelude
(
runOpaQuery
,
runCountOpaQuery
,
DBCmd
)
import
Gargantext.Database.Query.Facet
import
Gargantext.Database.Query.Filter
import
Gargantext.Database.Query.Table.Context
import
Gargantext.Database.Query.Filter
(
limit'
,
offset'
)
import
Gargantext.Database.Query.Table.Context
(
queryContextSearchTable
)
import
Gargantext.Database.Query.Table.ContextNodeNgrams
(
queryContextNodeNgramsTable
)
import
Gargantext.Database.Query.Table.Node
import
Gargantext.Database.Query.Table.Node
(
queryNodeSearchTable
,
defaultList
)
import
Gargantext.Database.Query.Table.Node.Error
(
HasNodeError
())
import
Gargantext.Database.Query.Table.NodeContext
import
Gargantext.Database.
Query.Table.NodeContext_NodeContext
import
Gargantext.Database.
Schema.NodeContext_NodeContext
(
NodeContext_NodeContextRead
,
queryNodeContext_NodeContextTable
,
ncnc_nodecontext2
,
ncnc_nodecontext1
)
import
Gargantext.Database.Schema.Context
import
Gargantext.Database.Schema.ContextNodeNgrams
(
ContextNodeNgramsPoly
(
..
))
import
Gargantext.Database.Schema.Ngrams
(
NgramsType
(
..
))
import
Gargantext.Database.Schema.Node
import
Gargantext.Database.Schema.Node
(
NodePolySearch
(
_ns_hyperdata
,
_ns_search
,
_ns_typename
,
_ns_id
)
)
import
Gargantext.Prelude
hiding
(
groupBy
)
import
Opaleye
hiding
(
Order
)
import
Opaleye
qualified
as
O
hiding
(
Order
)
...
...
@@ -59,7 +60,7 @@ import Opaleye qualified as O hiding (Order)
--
queryToTsSearch
::
API
.
Query
->
Field
SqlTSQuery
queryToTsSearch
q
=
sqlToTSQuery
$
T
.
unpack
$
(
API
.
interpretQuery
q
transformAST
)
queryToTsSearch
q
=
sqlToTSQuery
$
T
.
unpack
$
API
.
interpretQuery
q
transformAST
where
-- It's important to understand how things work under the hood: When we perform
...
...
src/Gargantext/Database/Query/Table/Ngrams.hs
View file @
72f5f35c
...
...
@@ -13,7 +13,6 @@ Portability : POSIX
{-# LANGUAGE Arrows #-}
{-# LANGUAGE QuasiQuotes #-}
{-# LANGUAGE TemplateHaskell #-}
module
Gargantext.Database.Query.Table.Ngrams
(
module
Gargantext
.
Database
.
Schema
.
Ngrams
...
...
@@ -30,6 +29,7 @@ import Data.HashMap.Strict qualified as HashMap
import
Data.List
qualified
as
List
import
Data.Map.Strict
qualified
as
Map
import
Database.PostgreSQL.Simple
qualified
as
PGS
import
Gargantext.Core.Text.Ngrams
(
Ngrams
,
NgramsType
)
import
Gargantext.Core.Types
import
Gargantext.Database.Prelude
(
runOpaQuery
,
formatPGSQuery
,
runPGSQuery
,
DBCmd
)
import
Gargantext.Database.Query.Join
(
leftJoin3
)
...
...
src/Gargantext/Database/Query/Table/NgramsPostag.hs
View file @
72f5f35c
...
...
@@ -24,11 +24,12 @@ import Data.HashMap.Strict qualified as HashMap
import
Data.List
qualified
as
List
import
Database.PostgreSQL.Simple
qualified
as
PGS
import
Gargantext.Core
import
Gargantext.Core.Types
import
Gargantext.Core.Text.Ngrams
(
Ngrams
,
ngramsSize
,
ngramsTerms
)
import
Gargantext.Core.Types
(
POS
)
import
Gargantext.Database.Prelude
(
runPGSQuery
,
runPGSQuery_
,
DBCmd
)
import
Gargantext.Database.Query.Table.Ngrams
import
Gargantext.Database.Query.Table.Ngrams
(
NgramsId
,
insertNgrams
)
import
Gargantext.Database.Schema.Prelude
import
Gargantext.Database.Types
import
Gargantext.Database.Types
(
Indexed
(
Indexed
)
)
import
Gargantext.Prelude
data
NgramsPostag
=
NgramsPostag
{
_np_lang
::
!
Lang
...
...
@@ -87,7 +88,7 @@ insertNgramsPostag' :: [NgramsPostagInsert] -> DBCmd err [Indexed Text Int]
insertNgramsPostag'
ns
=
runPGSQuery
queryInsertNgramsPostag
(
PGS
.
Only
$
Values
fields
ns
)
where
fields
=
map
(
\
t
->
QualifiedIdentifier
Nothing
t
)
$
snd
fields_name
fields
=
map
(
QualifiedIdentifier
Nothing
)
$
snd
fields_name
fields_name
::
(
[
Text
],
[
Text
])
fields_name
=
(
[
"lang_id"
,
"algo_id"
,
"postag"
,
"form"
,
"form_n"
,
"lem"
,
"lem_n"
]
...
...
@@ -155,7 +156,7 @@ SELECT terms,id FROM ins_form_ret
selectLems
::
Lang
->
NLPServerConfig
->
[
Ngrams
]
->
DBCmd
err
[(
Form
,
Lem
)]
selectLems
l
(
NLPServerConfig
{
server
})
ns
=
runPGSQuery
querySelectLems
(
PGS
.
Only
$
Values
fields
datas
)
where
fields
=
map
(
\
t
->
QualifiedIdentifier
Nothing
t
)
[
"int4"
,
"int4"
,
"text"
,
"int4"
]
fields
=
map
(
QualifiedIdentifier
Nothing
)
[
"int4"
,
"int4"
,
"text"
,
"int4"
]
datas
=
map
(
\
d
->
[
toField
$
toDBid
l
,
toField
$
toDBid
server
]
<>
toRow
d
)
ns
----------------------
...
...
src/Gargantext/Database/Query/Table/NodeContext.hs
View file @
72f5f35c
...
...
@@ -50,13 +50,15 @@ import Data.Time (UTCTime)
import
Database.PostgreSQL.Simple
qualified
as
PGS
(
In
(
..
),
Query
,
Only
(
..
))
import
Database.PostgreSQL.Simple.SqlQQ
(
sql
)
import
Database.PostgreSQL.Simple.Types
(
Values
(
..
),
QualifiedIdentifier
(
..
))
import
Gargantext.Core
import
Gargantext.Core.Types
import
Gargantext.Database.Admin.Types.Hyperdata
import
Gargantext.Core
(
HasDBid
(
toDBid
)
)
import
Gargantext.Database.Admin.Types.Node
import
Gargantext.Database.Admin.Types.Hyperdata.Document
(
HyperdataDocument
,
hd_publication_date
)
import
Gargantext.Database.Admin.Types.Hyperdata.Prelude
(
Hyperdata
)
import
Gargantext.Database.Query.Table.Node.Error
(
HasNodeError
,
NodeError
(
..
),
nodeError
)
import
Gargantext.Database.Prelude
import
Gargantext.Database.Schema.Context
import
Gargantext.Database.Schema.Node
import
Gargantext.Database.Schema.Ngrams
()
-- instances
import
Gargantext.Database.Schema.Node
(
node_id
,
node_typename
,
queryNodeTable
,
NodeRead
)
import
Gargantext.Database.Schema.NodeContext
import
Gargantext.Prelude
import
Gargantext.Prelude.Crypto.Hash
(
Hash
)
...
...
src/Gargantext/Database/Query/Table/NodeNgrams.hs
View file @
72f5f35c
...
...
@@ -16,7 +16,6 @@ NodeNgrams register Context of Ngrams (named Cgrams then)
{-# LANGUAGE Arrows #-}
{-# LANGUAGE FunctionalDependencies #-}
{-# LANGUAGE QuasiQuotes #-}
{-# LANGUAGE TemplateHaskell #-}
module
Gargantext.Database.Query.Table.NodeNgrams
(
getCgramsId
...
...
@@ -32,9 +31,10 @@ import Data.Map.Strict qualified as Map
import
Data.Maybe
(
fromJust
)
import
Database.PostgreSQL.Simple
qualified
as
PGS
(
Query
,
Only
(
..
))
import
Gargantext.Core
import
Gargantext.Core.Text.Ngrams
(
NgramsType
)
import
Gargantext.Core.Types
import
Gargantext.Database.Prelude
(
DBCmd
,
runPGSQuery
)
import
Gargantext.Database.Schema.Ngrams
(
NgramsType
,
fromNgramsTypeId
)
import
Gargantext.Database.Schema.Ngrams
(
fromNgramsTypeId
)
import
Gargantext.Database.Schema.NodeNgrams
import
Gargantext.Database.Schema.Prelude
(
Select
,
FromRow
,
sql
,
fromRow
,
toField
,
field
,
Values
(
..
),
QualifiedIdentifier
(
..
),
selectTable
)
import
Gargantext.Prelude
...
...
src/Gargantext/Database/Query/Table/NodeNode.hs
View file @
72f5f35c
...
...
@@ -39,10 +39,12 @@ import Data.Text (splitOn)
import
Database.PostgreSQL.Simple
qualified
as
PGS
import
Database.PostgreSQL.Simple.SqlQQ
(
sql
)
import
Database.PostgreSQL.Simple.Types
(
Values
(
..
),
QualifiedIdentifier
(
..
))
import
Gargantext.Core
import
Gargantext.Core.Types
import
Gargantext.Database.Admin.Types.Hyperdata
import
Gargantext.Core
(
HasDBid
(
toDBid
)
)
import
Gargantext.Database.Admin.Types.Node
import
Gargantext.Database.Admin.Types.Hyperdata.Document
(
HyperdataDocument
,
hd_publication_date
)
import
Gargantext.Database.Admin.Types.Hyperdata.Prelude
(
Hyperdata
)
import
Gargantext.Database.Prelude
(
DBCmd
,
mkCmd
,
runPGSQuery
,
runCountOpaQuery
,
runOpaQuery
)
import
Gargantext.Database.Schema.Ngrams
()
import
Gargantext.Database.Schema.Node
import
Gargantext.Database.Schema.NodeNode
import
Gargantext.Prelude
...
...
src/Gargantext/Database/Schema/Ngrams.hs
View file @
72f5f35c
...
...
@@ -20,26 +20,20 @@ Ngrams connection to the Database.
module
Gargantext.Database.Schema.Ngrams
where
import
Codec.Serialise
(
Serialise
())
import
Control.Lens
(
over
)
import
Data.Aeson
import
Data.Aeson.Types
(
toJSONKeyText
)
import
Data.Bimap
(
Bimap
)
import
Data.Bimap
qualified
as
Bimap
import
Data.ByteString.Char8
qualified
as
B
import
Data.HashMap.Strict
(
HashMap
)
import
Data.HashMap.Strict
qualified
as
HashMap
import
Data.Text
(
splitOn
,
pack
,
strip
)
import
Data.Text
(
splitOn
,
strip
)
import
Database.PostgreSQL.Simple
qualified
as
PGS
import
Database.PostgreSQL.Simple.FromField
(
returnError
,
ResultError
(
..
))
import
Gargantext.Core
(
HasDBid
(
..
))
import
Gargantext.Core.Types
(
TODO
(
..
),
Typed
(
..
))
import
Gargantext.Core.Text.Ngrams
(
Ngrams
(
..
),
NgramsType
(
..
),
NgramsT
)
import
Gargantext.Core.Types
(
Typed
(
..
))
import
Gargantext.Database.Schema.Prelude
hiding
(
over
)
import
Gargantext.Database.Types
import
Gargantext.Database.Types
(
Indexed
(
Indexed
)
)
import
Gargantext.Prelude
import
Servant
(
FromHttpApiData
(
..
),
ToHttpApiData
(
..
))
import
Test.QuickCheck
(
elements
)
import
Text.Read
(
read
)
type
NgramsId
=
Int
...
...
@@ -71,46 +65,6 @@ ngramsTable = Table "ngrams" (pNgramsDb NgramsDB { _ngrams_id = optionalTable
}
)
-- | Main Ngrams Types
-- | Typed Ngrams
-- Typed Ngrams localize the context of the ngrams
-- ngrams in source field of document has Sources Type
-- ngrams in authors field of document has Authors Type
-- ngrams in text fields of documents has Terms Type (i.e. either title or abstract)
data
NgramsType
=
Authors
|
Institutes
|
Sources
|
NgramsTerms
deriving
(
Eq
,
Show
,
Read
,
Ord
,
Enum
,
Bounded
,
Generic
)
instance
Serialise
NgramsType
instance
FromJSON
NgramsType
where
parseJSON
(
String
"Authors"
)
=
pure
Authors
parseJSON
(
String
"Institutes"
)
=
pure
Institutes
parseJSON
(
String
"Sources"
)
=
pure
Sources
parseJSON
(
String
"Terms"
)
=
pure
NgramsTerms
parseJSON
(
String
"NgramsTerms"
)
=
pure
NgramsTerms
parseJSON
_
=
mzero
instance
FromJSONKey
NgramsType
where
fromJSONKey
=
FromJSONKeyTextParser
(
parseJSON
.
String
)
instance
ToJSON
NgramsType
where
toJSON
Authors
=
String
"Authors"
toJSON
Institutes
=
String
"Institutes"
toJSON
Sources
=
String
"Sources"
toJSON
NgramsTerms
=
String
"Terms"
instance
ToJSONKey
NgramsType
where
toJSONKey
=
toJSONKeyText
(
pack
.
show
)
instance
FromHttpApiData
NgramsType
where
parseUrlPiece
n
=
pure
$
(
read
.
cs
)
n
instance
ToHttpApiData
NgramsType
where
toUrlPiece
=
pack
.
show
instance
ToParamSchema
NgramsType
where
toParamSchema
_
=
toParamSchema
(
Proxy
::
Proxy
TODO
)
instance
Arbitrary
NgramsType
where
arbitrary
=
elements
[
minBound
..
maxBound
]
-- map NgramsType to its assigned id
instance
FromField
NgramsType
where
fromField
fld
mdata
=
...
...
@@ -127,14 +81,19 @@ instance FromField NgramsType where
instance
ToField
NgramsType
where
toField
nt
=
toField
$
toDBid
nt
instance
FromField
Ngrams
where
fromField
fld
mdata
=
do
x
<-
fromField
fld
mdata
pure
$
text2ngrams
x
ngramsTypes
::
[
NgramsType
]
ngramsTypes
=
[
minBound
..
]
instance
PGS
.
ToRow
Text
where
toRow
t
=
[
toField
t
]
text2ngrams
::
Text
->
Ngrams
text2ngrams
txt
=
UnsafeNgrams
txt'
$
length
$
splitOn
" "
txt'
where
txt'
=
strip
txt
instance
ToSchema
NgramsType
{- where
declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_nre_")
--}
newtype
NgramsTypeId
=
NgramsTypeId
Int
deriving
(
Eq
,
Show
,
Ord
,
Num
)
...
...
@@ -179,46 +138,6 @@ instance HasDBid NgramsType where
------------------------------------------------------------------------
------------------------------------------------------------------------
-- | TODO put it in Gargantext.Core.Text.Ngrams
data
Ngrams
=
UnsafeNgrams
{
_ngramsTerms
::
Text
,
_ngramsSize
::
Int
}
deriving
(
Generic
,
Show
,
Eq
,
Ord
)
instance
Hashable
Ngrams
makeLenses
''
N
grams
instance
PGS
.
ToRow
Ngrams
where
toRow
(
UnsafeNgrams
t
s
)
=
[
toField
t
,
toField
s
]
instance
FromField
Ngrams
where
fromField
fld
mdata
=
do
x
<-
fromField
fld
mdata
pure
$
text2ngrams
x
instance
PGS
.
ToRow
Text
where
toRow
t
=
[
toField
t
]
text2ngrams
::
Text
->
Ngrams
text2ngrams
txt
=
UnsafeNgrams
txt'
$
length
$
splitOn
" "
txt'
where
txt'
=
strip
txt
------------------------------------------------------------------------
-------------------------------------------------------------------------
-- | TODO put it in Gargantext.Core.Text.Ngrams
-- Named entity are typed ngrams of Terms Ngrams
data
NgramsT
a
=
NgramsT
{
_ngramsType
::
NgramsType
,
_ngramsT
::
a
}
deriving
(
Generic
,
Show
,
Eq
,
Ord
)
makeLenses
''
N
gramsT
instance
Functor
NgramsT
where
fmap
=
over
ngramsT
-----------------------------------------------------------------------
withMap
::
HashMap
Text
NgramsId
->
Text
->
NgramsId
withMap
m
n
=
maybe
(
panicTrace
$
"[G.D.S.Ngrams.withMap] Should not happen"
<>
(
show
n
))
...
...
src/Gargantext/Database/Schema/NgramsPostag.hs
View file @
72f5f35c
...
...
@@ -20,9 +20,9 @@ ngrams in NgramsTerm Lists.
module
Gargantext.Database.Schema.NgramsPostag
where
import
Control.Lens
import
Control.Lens
(
makeLenses
)
import
Database.PostgreSQL.Simple
qualified
as
PGS
import
Gargantext.Database.Schema.Prelude
import
Gargantext.Database.Schema.Prelude
(
Column
,
SqlInt4
,
SqlText
,
ToField
(
toField
),
toRow
)
import
Gargantext.Prelude
...
...
src/Gargantext/Database/Schema/NodeNgrams.hs
View file @
72f5f35c
...
...
@@ -20,8 +20,9 @@ NodeNgrams register Context of Ngrams (named Cgrams then)
module
Gargantext.Database.Schema.NodeNgrams
where
import
Gargantext.Core.Types
import
Gargantext.Database.Schema.Ngrams
(
NgramsType
)
import
Gargantext.Core.Text.Ngrams
(
NgramsType
)
import
Gargantext.Core.Types.Main
(
ListType
)
import
Gargantext.Database.Admin.Types.Node
(
NodeId
)
import
Gargantext.Database.Schema.Prelude
import
Gargantext.Prelude
...
...
src/Gargantext/Utils/SpacyNLP.hs
View file @
72f5f35c
...
...
@@ -13,7 +13,6 @@ Server to be used: https://gitlab.iscpif.fr/gargantext/spacy-server
-}
{-# LANGUAGE TemplateHaskell #-}
module
Gargantext.Utils.SpacyNLP
(
module
Gargantext
.
Utils
.
SpacyNLP
.
Types
...
...
@@ -24,9 +23,8 @@ module Gargantext.Utils.SpacyNLP (
)
where
import
Data.Aeson
(
encode
)
import
Data.Text
hiding
(
map
,
group
,
filter
,
concat
,
zip
)
import
Gargantext.Core
(
Lang
(
..
))
import
Gargantext.Core.Text.Terms.Multi.PosTagging.Types
import
Gargantext.Core.Text.Terms.Multi.PosTagging.Types
(
PosSentences
(
PosSentences
),
Sentence
(
Sentence
),
Token
(
Token
)
)
import
Gargantext.Prelude
import
Network.HTTP.Simple
(
parseRequest
,
httpJSON
,
setRequestBodyLBS
,
getResponseBody
,
Response
)
import
Network.URI
(
URI
(
..
))
...
...
@@ -42,22 +40,22 @@ spacyRequest uri txt = do
----------------------------------------------------------------
spacyTagsToToken
::
SpacyTags
->
Token
spacyTagsToToken
st
=
Token
(
_spacyTags_index
st
)
(
_spacyTags_normalized
st
)
(
_spacyTags_text
st
)
(
_spacyTags_lemma
st
)
(
_spacyTags_head_index
st
)
(
_spacyTags_char_offset
st
)
(
Just
$
_spacyTags_pos
st
)
(
Just
$
_spacyTags_ent_type
st
)
(
Just
$
_spacyTags_prefix
st
)
(
Just
$
_spacyTags_suffix
st
)
spacyTagsToToken
st
=
Token
(
_spacyTags_index
st
)
(
_spacyTags_normalized
st
)
(
_spacyTags_text
st
)
(
_spacyTags_lemma
st
)
(
_spacyTags_head_index
st
)
(
_spacyTags_char_offset
st
)
(
Just
$
_spacyTags_pos
st
)
(
Just
$
_spacyTags_ent_type
st
)
(
Just
$
_spacyTags_prefix
st
)
(
Just
$
_spacyTags_suffix
st
)
spacyDataToPosSentences
::
SpacyData
->
PosSentences
spacyDataToPosSentences
(
SpacyData
ds
)
=
PosSentences
$
map
(
\
(
i
,
ts
)
->
Sentence
i
ts
)
$
zip
[
1
..
]
$
map
(
\
(
SpacyText
_
tags
)
->
map
spacyTagsToToken
tags
)
ds
$
zipWith
Sentence
[
1
..
]
(
map
(
\
(
SpacyText
_
tags
)
->
map
spacyTagsToToken
tags
)
ds
)
-----------------------------------------------------------------
...
...
test/Test/API/UpdateList.hs
View file @
72f5f35c
...
...
@@ -13,7 +13,7 @@ module Test.API.UpdateList (
,
pollUntilFinished
)
where
import
Control.Lens
((
^.
),
mapped
,
over
,
view
)
import
Control.Lens
((
^.
),
mapped
,
over
)
import
Control.Monad.Fail
(
fail
)
import
Data.Aeson.QQ
import
Data.Map.Strict
qualified
as
Map
...
...
@@ -27,14 +27,13 @@ import Gargantext.API.Admin.Auth.Types (Token)
import
Gargantext.API.Ngrams
qualified
as
APINgrams
import
Gargantext.API.Ngrams.List
(
ngramsListFromCSVData
)
import
Gargantext.API.Ngrams.Types
(
MSet
(
..
),
NgramsPatch
(
..
),
NgramsRepoElement
(
..
),
NgramsTablePatch
(
..
),
NgramsTerm
(
..
),
Versioned
(
..
),
mSetToList
,
toNgramsPatch
,
ne_children
,
ne_ngrams
,
vc_data
,
_NgramsTable
)
import
Gargantext.Core.
NodeStory
(
hasNodeStory
,
nse_getter
,
HasNodeArchiveStoryImmediateSaver
(
..
))
import
Gargantext.Core.
Text.Ngrams
import
Gargantext.Core.Types
(
CorpusId
,
ListId
,
ListType
(
..
),
NodeId
,
_NodeId
)
import
Gargantext.Core.Types.Individu
import
Gargantext.Database.Action.User
import
Gargantext.Database.Admin.Types.Hyperdata.Corpus
import
Gargantext.Database.Query.Table.Node
import
Gargantext.Database.Query.Tree.Root
import
Gargantext.Database.Schema.Ngrams
import
Gargantext.Prelude
hiding
(
get
)
import
Network.Wai.Handler.Warp
qualified
as
Wai
import
Paths_gargantext
(
getDataFileName
)
...
...
test/Test/Database/Operations/NodeStory.hs
View file @
72f5f35c
...
...
@@ -24,6 +24,7 @@ import Gargantext.API.Ngrams (commitStatePatch, mSetFromList, setListNgrams, sav
import
Gargantext.API.Ngrams.Types
(
MSet
(
..
),
NgramsPatch
(
..
),
NgramsRepoElement
(
..
),
NgramsTerm
(
..
),
Versioned
(
..
),
mkNgramsTablePatch
,
nre_children
,
nre_list
,
nre_parent
,
nre_root
)
import
Gargantext.API.Ngrams.Tools
(
getNodeStory
)
import
Gargantext.Core.NodeStory
import
Gargantext.Core.Text.Ngrams
(
NgramsType
(
..
))
import
Gargantext.Core.Types.Individu
import
Gargantext.Core.Types
(
ListType
(
..
),
ListId
,
NodeId
,
UserId
)
import
Gargantext.Database.Action.User
(
getUserId
)
...
...
@@ -32,7 +33,6 @@ import Gargantext.Database.Prelude (runPGSQuery)
import
Gargantext.Database.Query.Table.Ngrams
(
selectNgramsId
)
import
Gargantext.Database.Query.Table.Node
import
Gargantext.Database.Query.Tree.Root
import
Gargantext.Database.Schema.Ngrams
(
NgramsType
(
..
))
import
Gargantext.Database.Schema.Node
(
NodePoly
(
..
))
import
Gargantext.Prelude
import
Test.Database.Types
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment