Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Grégoire Locqueville
haskell-gargantext
Commits
3b163685
Commit
3b163685
authored
Oct 27, 2022
by
Przemyslaw Kaminski
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
[ngrams] first version of ngrams counting in docs
parent
0d6a5eaa
Changes
34
Hide whitespace changes
Inline
Side-by-side
Showing
34 changed files
with
230 additions
and
151 deletions
+230
-151
README.md
README.md
+2
-0
Main.hs
bin/gargantext-upgrade/Main.hs
+1
-2
docker-compose.yaml
devops/docker/docker-compose.yaml
+1
-1
schema.sql
devops/postgres/schema.sql
+1
-0
0.0.6.7.3.sql
devops/postgres/upgrade/0.0.6.7.3.sql
+2
-0
gargantext.cabal
gargantext.cabal
+5
-4
package.yaml
package.yaml
+2
-0
Text.hs
src-test/Core/Text.hs
+0
-1
Utils.hs
src-test/Core/Utils.hs
+28
-0
Clustering.hs
src-test/Graph/Clustering.hs
+0
-2
Main.hs
src-test/Main.hs
+4
-1
Crypto.hs
src-test/Utils/Crypto.hs
+1
-3
Ngrams.hs
src/Gargantext/API/Ngrams.hs
+4
-4
List.hs
src/Gargantext/API/Ngrams/List.hs
+8
-8
Tools.hs
src/Gargantext/API/Ngrams/Tools.hs
+2
-2
NodeStory.hs
src/Gargantext/Core/NodeStory.hs
+5
-5
Terms.hs
src/Gargantext/Core/Text/Terms.hs
+39
-34
Eleve.hs
src/Gargantext/Core/Text/Terms/Eleve.hs
+3
-0
Mono.hs
src/Gargantext/Core/Text/Terms/Mono.hs
+2
-2
Multi.hs
src/Gargantext/Core/Text/Terms/Multi.hs
+11
-8
Group.hs
src/Gargantext/Core/Text/Terms/Multi/Group.hs
+0
-1
En.hs
src/Gargantext/Core/Text/Terms/Multi/Lang/En.hs
+0
-1
PosTagging.hs
src/Gargantext/Core/Text/Terms/Multi/PosTagging.hs
+11
-7
Types.hs
src/Gargantext/Core/Text/Terms/Multi/PosTagging/Types.hs
+0
-1
WithList.hs
src/Gargantext/Core/Text/Terms/WithList.hs
+7
-5
Types.hs
src/Gargantext/Core/Types.hs
+6
-5
Utils.hs
src/Gargantext/Core/Utils.hs
+19
-2
LegacyMain.hs
src/Gargantext/Core/Viz/Phylo/Legacy/LegacyMain.hs
+1
-2
Flow.hs
src/Gargantext/Database/Action/Flow.hs
+40
-32
Utils.hs
src/Gargantext/Database/Action/Flow/Utils.hs
+10
-9
ContextNodeNgrams.hs
src/Gargantext/Database/Query/Table/ContextNodeNgrams.hs
+2
-1
UpdateOpaleye.hs
src/Gargantext/Database/Query/Table/Node/UpdateOpaleye.hs
+3
-4
ContextNodeNgrams.hs
src/Gargantext/Database/Schema/ContextNodeNgrams.hs
+8
-2
NodeNgrams.hs
src/Gargantext/Database/Schema/NodeNgrams.hs
+2
-2
No files found.
README.md
View file @
3b163685
...
@@ -196,6 +196,8 @@ To build documentation, run:
...
@@ -196,6 +196,8 @@ To build documentation, run:
stack
--docker
build
--haddock
--no-haddock-deps
--fast
stack
--docker
build
--haddock
--no-haddock-deps
--fast
```
```
(in
`.stack-work/dist/x86_64-linux-nix/Cabal-3.2.1.0/doc/html/gargantext`
).
## GraphQL
## GraphQL
Some introspection information.
Some introspection information.
...
...
bin/gargantext-upgrade/Main.hs
View file @
3b163685
...
@@ -33,7 +33,6 @@ import Gargantext.Database.Admin.Trigger.Init
...
@@ -33,7 +33,6 @@ import Gargantext.Database.Admin.Trigger.Init
import
Gargantext.Database.Admin.Types.Hyperdata
(
HyperdataCorpus
)
import
Gargantext.Database.Admin.Types.Hyperdata
(
HyperdataCorpus
)
import
Gargantext.Database.Admin.Types.Node
(
NodeType
(
NodeDocument
,
NodeContact
))
import
Gargantext.Database.Admin.Types.Node
(
NodeType
(
NodeDocument
,
NodeContact
))
import
Gargantext.Database.Prelude
(
Cmd
''
,
Cmd
,
execPGSQuery
)
import
Gargantext.Database.Prelude
(
Cmd
''
,
Cmd
,
execPGSQuery
)
import
Gargantext.Database.Query.Table.Node
(
getOrMkList
)
import
Gargantext.Prelude
import
Gargantext.Prelude
import
Gargantext.Prelude.Config
(
GargConfig
(
..
),
readConfig
)
import
Gargantext.Prelude.Config
(
GargConfig
(
..
),
readConfig
)
import
Prelude
(
getLine
)
import
Prelude
(
getLine
)
...
@@ -67,7 +66,7 @@ main = do
...
@@ -67,7 +66,7 @@ main = do
_ok
<-
getLine
_ok
<-
getLine
cfg
<-
readConfig
iniPath
cfg
<-
readConfig
iniPath
let
secret
=
_gc_secretkey
cfg
let
_
secret
=
_gc_secretkey
cfg
withDevEnv
iniPath
$
\
env
->
do
withDevEnv
iniPath
$
\
env
->
do
-- First upgrade the Database Schema
-- First upgrade the Database Schema
...
...
devops/docker/docker-compose.yaml
View file @
3b163685
...
@@ -24,7 +24,7 @@ services:
...
@@ -24,7 +24,7 @@ services:
network_mode
:
host
network_mode
:
host
#command: ["postgres", "-c", "log_statement=all"]
#command: ["postgres", "-c", "log_statement=all"]
#ports:
#ports:
#
- 5432:5432
#
- 5432:5432
environment
:
environment
:
POSTGRES_USER
:
gargantua
POSTGRES_USER
:
gargantua
POSTGRES_PASSWORD
:
C8kdcUrAQy66U
POSTGRES_PASSWORD
:
C8kdcUrAQy66U
...
...
devops/postgres/schema.sql
View file @
3b163685
...
@@ -164,6 +164,7 @@ CREATE TABLE public.context_node_ngrams (
...
@@ -164,6 +164,7 @@ CREATE TABLE public.context_node_ngrams (
ngrams_id
INTEGER
NOT
NULL
REFERENCES
public
.
ngrams
(
id
)
ON
DELETE
CASCADE
,
ngrams_id
INTEGER
NOT
NULL
REFERENCES
public
.
ngrams
(
id
)
ON
DELETE
CASCADE
,
ngrams_type
INTEGER
,
ngrams_type
INTEGER
,
weight
double
precision
,
weight
double
precision
,
doc_count
INTEGER
,
PRIMARY
KEY
(
context_id
,
node_id
,
ngrams_id
,
ngrams_type
)
PRIMARY
KEY
(
context_id
,
node_id
,
ngrams_id
,
ngrams_type
)
);
);
ALTER
TABLE
public
.
context_node_ngrams
OWNER
TO
gargantua
;
ALTER
TABLE
public
.
context_node_ngrams
OWNER
TO
gargantua
;
...
...
devops/postgres/upgrade/0.0.6.7.3.sql
0 → 100644
View file @
3b163685
ALTER
TABLE
context_node_ngrams
ADD
COLUMN
doc_count
INTEGER
;
gargantext.cabal
View file @
3b163685
cabal-version: 1.12
cabal-version: 1.12
-- This file has been generated from package.yaml by hpack version 0.3
5.0
.
-- This file has been generated from package.yaml by hpack version 0.3
4.7
.
--
--
-- see: https://github.com/sol/hpack
-- see: https://github.com/sol/hpack
name: gargantext
name: gargantext
version: 0.0.6.8.2
version:
0.0.6.8.2
synopsis: Search, map, share
synopsis: Search, map, share
description: Please see README.md
description: Please see README.md
category: Data
category: Data
...
@@ -50,6 +50,7 @@ library
...
@@ -50,6 +50,7 @@ library
Gargantext.Core.Types
Gargantext.Core.Types
Gargantext.Core.Types.Individu
Gargantext.Core.Types.Individu
Gargantext.Core.Types.Main
Gargantext.Core.Types.Main
Gargantext.Core.Utils
Gargantext.Core.Utils.Prefix
Gargantext.Core.Utils.Prefix
Gargantext.Utils.Jobs
Gargantext.Utils.Jobs
Gargantext.Utils.Jobs.API
Gargantext.Utils.Jobs.API
...
@@ -87,6 +88,7 @@ library
...
@@ -87,6 +88,7 @@ library
Gargantext.Core.Text.Prepare
Gargantext.Core.Text.Prepare
Gargantext.Core.Text.Search
Gargantext.Core.Text.Search
Gargantext.Core.Text.Terms
Gargantext.Core.Text.Terms
Gargantext.Core.Text.Terms.Eleve
Gargantext.Core.Text.Terms.Mono
Gargantext.Core.Text.Terms.Mono
Gargantext.Core.Text.Terms.Multi.Lang.En
Gargantext.Core.Text.Terms.Multi.Lang.En
Gargantext.Core.Text.Terms.Multi.Lang.Fr
Gargantext.Core.Text.Terms.Multi.Lang.Fr
...
@@ -211,7 +213,6 @@ library
...
@@ -211,7 +213,6 @@ library
Gargantext.Core.Text.Samples.EN
Gargantext.Core.Text.Samples.EN
Gargantext.Core.Text.Samples.FR
Gargantext.Core.Text.Samples.FR
Gargantext.Core.Text.Samples.SP
Gargantext.Core.Text.Samples.SP
Gargantext.Core.Text.Terms.Eleve
Gargantext.Core.Text.Terms.Mono.Stem
Gargantext.Core.Text.Terms.Mono.Stem
Gargantext.Core.Text.Terms.Mono.Stem.En
Gargantext.Core.Text.Terms.Mono.Stem.En
Gargantext.Core.Text.Terms.Mono.Token
Gargantext.Core.Text.Terms.Mono.Token
...
@@ -221,7 +222,6 @@ library
...
@@ -221,7 +222,6 @@ library
Gargantext.Core.Text.Terms.Multi.PosTagging
Gargantext.Core.Text.Terms.Multi.PosTagging
Gargantext.Core.Text.Terms.Multi.PosTagging.Types
Gargantext.Core.Text.Terms.Multi.PosTagging.Types
Gargantext.Core.Types.Phylo
Gargantext.Core.Types.Phylo
Gargantext.Core.Utils
Gargantext.Core.Utils.DateUtils
Gargantext.Core.Utils.DateUtils
Gargantext.Core.Viz
Gargantext.Core.Viz
Gargantext.Core.Viz.Chart
Gargantext.Core.Viz.Chart
...
@@ -809,6 +809,7 @@ test-suite garg-test
...
@@ -809,6 +809,7 @@ test-suite garg-test
Core.Text
Core.Text
Core.Text.Examples
Core.Text.Examples
Core.Text.Flow
Core.Text.Flow
Core.Utils
Graph.Clustering
Graph.Clustering
Graph.Distance
Graph.Distance
Ngrams.Lang
Ngrams.Lang
...
...
package.yaml
View file @
3b163685
...
@@ -74,6 +74,7 @@ library:
...
@@ -74,6 +74,7 @@ library:
-
Gargantext.Core.Types
-
Gargantext.Core.Types
-
Gargantext.Core.Types.Individu
-
Gargantext.Core.Types.Individu
-
Gargantext.Core.Types.Main
-
Gargantext.Core.Types.Main
-
Gargantext.Core.Utils
-
Gargantext.Core.Utils.Prefix
-
Gargantext.Core.Utils.Prefix
-
Gargantext.Utils.Jobs
-
Gargantext.Utils.Jobs
-
Gargantext.Utils.Jobs.API
-
Gargantext.Utils.Jobs.API
...
@@ -111,6 +112,7 @@ library:
...
@@ -111,6 +112,7 @@ library:
-
Gargantext.Core.Text.Prepare
-
Gargantext.Core.Text.Prepare
-
Gargantext.Core.Text.Search
-
Gargantext.Core.Text.Search
-
Gargantext.Core.Text.Terms
-
Gargantext.Core.Text.Terms
-
Gargantext.Core.Text.Terms.Eleve
-
Gargantext.Core.Text.Terms.Mono
-
Gargantext.Core.Text.Terms.Mono
-
Gargantext.Core.Text.Terms.Multi.Lang.En
-
Gargantext.Core.Text.Terms.Multi.Lang.En
-
Gargantext.Core.Text.Terms.Multi.Lang.Fr
-
Gargantext.Core.Text.Terms.Multi.Lang.Fr
...
...
src-test/Core/Text.hs
View file @
3b163685
{-|
{-|
Module : Graph.Clustering
Module : Graph.Clustering
Description : Basic tests to avoid quick regression
Description : Basic tests to avoid quick regression
...
...
src-test/Core/Utils.hs
0 → 100644
View file @
3b163685
{-|
Module : Core.Utils
Description :
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
module
Core.Utils
where
import
Test.Hspec
import
Gargantext.Prelude
import
Gargantext.Core.Utils
-- | Core.Utils tests
test
::
IO
()
test
=
hspec
$
do
describe
"check if groupWithCounts works"
$
do
it
"simple integer array"
$
do
(
groupWithCounts
[
1
,
2
,
3
,
1
,
2
,
3
])
`
shouldBe
`
[(
1
,
2
),
(
2
,
2
),
(
3
,
2
)]
it
"string"
$
do
(
groupWithCounts
"abccba"
)
`
shouldBe
`
[(
'a'
,
2
),
(
'b'
,
2
),
(
'c'
,
2
)]
src-test/Graph/Clustering.hs
View file @
3b163685
{-|
{-|
Module : Graph.Clustering
Module : Graph.Clustering
Description : Basic tests to avoid quick regression
Description : Basic tests to avoid quick regression
...
@@ -40,4 +39,3 @@ test = hspec $ do
...
@@ -40,4 +39,3 @@ test = hspec $ do
let
let
result
=
List
.
length
partitions
>
1
result
=
List
.
length
partitions
>
1
shouldBe
True
result
shouldBe
True
result
src-test/Main.hs
View file @
3b163685
...
@@ -11,6 +11,8 @@ Portability : POSIX
...
@@ -11,6 +11,8 @@ Portability : POSIX
import
Gargantext.Prelude
import
Gargantext.Prelude
import
Gargantext.Core
(
Lang
(
..
))
import
Gargantext.Core
(
Lang
(
..
))
import
qualified
Core.Utils
as
Utils
--import qualified Ngrams.Lang.Fr as Fr
--import qualified Ngrams.Lang.Fr as Fr
--import qualified Ngrams.Lang as Lang
--import qualified Ngrams.Lang as Lang
import
qualified
Ngrams.Lang.Occurrences
as
Occ
import
qualified
Ngrams.Lang.Occurrences
as
Occ
...
@@ -22,11 +24,12 @@ import qualified Utils.Crypto as Crypto
...
@@ -22,11 +24,12 @@ import qualified Utils.Crypto as Crypto
main
::
IO
()
main
::
IO
()
main
=
do
main
=
do
Utils
.
test
-- Occ.parsersTest
-- Occ.parsersTest
-- Lang.ngramsExtractionTest FR
-- Lang.ngramsExtractionTest FR
-- Lang.ngramsExtractionTest EN
-- Lang.ngramsExtractionTest EN
-- Metrics.main
-- Metrics.main
Graph
.
test
Graph
.
test
PD
.
testFromRFC3339
PD
.
testFromRFC3339
-- GD.test
-- GD.test
Crypto
.
test
Crypto
.
test
src-test/Utils/Crypto.hs
View file @
3b163685
{-|
{-|
Module : Utils.Crypto
Module : Utils.Crypto
Description :
Description :
Copyright : (c) CNRS, 2017-Present
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Maintainer : team@gargantext.org
...
@@ -43,4 +42,3 @@ test = hspec $ do
...
@@ -43,4 +42,3 @@ test = hspec $ do
let
hash2
=
hash
([
"b"
,
"a"
]
::
[
Text
])
let
hash2
=
hash
([
"b"
,
"a"
]
::
[
Text
])
it
"compare"
$
do
it
"compare"
$
do
hash1
`
shouldBe
`
hash2
hash1
`
shouldBe
`
hash2
src/Gargantext/API/Ngrams.hs
View file @
3b163685
...
@@ -184,9 +184,9 @@ saveNodeStory :: ( MonadReader env m, MonadBase IO m, HasNodeStorySaver env )
...
@@ -184,9 +184,9 @@ saveNodeStory :: ( MonadReader env m, MonadBase IO m, HasNodeStorySaver env )
saveNodeStory
=
do
saveNodeStory
=
do
saver
<-
view
hasNodeStorySaver
saver
<-
view
hasNodeStorySaver
liftBase
$
do
liftBase
$
do
Gargantext
.
Prelude
.
putStrLn
"---- Running node story saver ----"
--
Gargantext.Prelude.putStrLn "---- Running node story saver ----"
saver
saver
Gargantext
.
Prelude
.
putStrLn
"---- Node story saver finished ----"
--
Gargantext.Prelude.putStrLn "---- Node story saver finished ----"
saveNodeStoryImmediate
::
(
MonadReader
env
m
,
MonadBase
IO
m
,
HasNodeStoryImmediateSaver
env
)
saveNodeStoryImmediate
::
(
MonadReader
env
m
,
MonadBase
IO
m
,
HasNodeStoryImmediateSaver
env
)
...
@@ -194,9 +194,9 @@ saveNodeStoryImmediate :: ( MonadReader env m, MonadBase IO m, HasNodeStoryImmed
...
@@ -194,9 +194,9 @@ saveNodeStoryImmediate :: ( MonadReader env m, MonadBase IO m, HasNodeStoryImmed
saveNodeStoryImmediate
=
do
saveNodeStoryImmediate
=
do
saver
<-
view
hasNodeStoryImmediateSaver
saver
<-
view
hasNodeStoryImmediateSaver
liftBase
$
do
liftBase
$
do
Gargantext
.
Prelude
.
putStrLn
"---- Running node story immediate saver ----"
--
Gargantext.Prelude.putStrLn "---- Running node story immediate saver ----"
saver
saver
Gargantext
.
Prelude
.
putStrLn
"---- Node story immediate saver finished ----"
--
Gargantext.Prelude.putStrLn "---- Node story immediate saver finished ----"
listTypeConflictResolution
::
ListType
->
ListType
->
ListType
listTypeConflictResolution
::
ListType
->
ListType
->
ListType
...
...
src/Gargantext/API/Ngrams/List.hs
View file @
3b163685
...
@@ -159,15 +159,15 @@ reIndexWith cId lId nt lts = do
...
@@ -159,15 +159,15 @@ reIndexWith cId lId nt lts = do
-- TODO Tests here
-- TODO Tests here
let
let
ngramsByDoc
=
map
(
HashMap
.
fromList
)
ngramsByDoc
=
map
(
HashMap
.
fromList
)
$
map
(
map
(
\
(
k
,
v
)
->
(
SimpleNgrams
(
text2ngrams
k
),
v
)))
$
map
(
map
(
\
((
k
,
cnt
),
v
)
->
(
SimpleNgrams
(
text2ngrams
k
),
over
(
traverse
.
traverse
)
(
\
p
->
(
p
,
cnt
))
v
)))
$
map
(
\
doc
->
List
.
zip
$
map
(
\
doc
->
List
.
zip
(
termsInText
(
buildPatterns
$
map
(
\
k
->
(
Text
.
splitOn
" "
$
unNgramsTerm
k
,
[]
))
orphans
)
(
termsInText
(
buildPatterns
$
map
(
\
k
->
(
Text
.
splitOn
" "
$
unNgramsTerm
k
,
[]
))
orphans
)
$
Text
.
unlines
$
catMaybes
$
Text
.
unlines
$
catMaybes
[
doc
^.
context_hyperdata
.
hd_title
[
doc
^.
context_hyperdata
.
hd_title
,
doc
^.
context_hyperdata
.
hd_abstract
,
doc
^.
context_hyperdata
.
hd_abstract
]
]
)
)
(
List
.
cycle
[
Map
.
fromList
$
[(
nt
,
Map
.
singleton
(
doc
^.
context_id
)
1
)]])
(
List
.
cycle
[
Map
.
fromList
$
[(
nt
,
Map
.
singleton
(
doc
^.
context_id
)
1
)]])
)
docs
)
docs
-- printDebug "ngramsByDoc" ngramsByDoc
-- printDebug "ngramsByDoc" ngramsByDoc
...
...
src/Gargantext/API/Ngrams/Tools.hs
View file @
3b163685
...
@@ -195,7 +195,7 @@ getCoocByNgrams' f (Diagonal diag) m =
...
@@ -195,7 +195,7 @@ getCoocByNgrams' f (Diagonal diag) m =
listToCombi
identity
ks
listToCombi
identity
ks
]
]
where
where
ks
=
HM
.
keys
m
ks
=
HM
.
keys
m
-- TODO k could be either k1 or k2 here
-- TODO k could be either k1 or k2 here
...
@@ -220,7 +220,7 @@ getCoocByNgrams'' (Diagonal diag) (f1,f2) (m1,m2) =
...
@@ -220,7 +220,7 @@ getCoocByNgrams'' (Diagonal diag) (f1,f2) (m1,m2) =
-- TODO check optim
-- TODO check optim
-- listToCombi identity ks1
-- listToCombi identity ks1
]
]
where
where
ks1
=
HM
.
keys
m1
ks1
=
HM
.
keys
m1
ks2
=
HM
.
keys
m2
ks2
=
HM
.
keys
m2
...
...
src/Gargantext/Core/NodeStory.hs
View file @
3b163685
...
@@ -455,7 +455,7 @@ insertArchiveList c nodeId a = do
...
@@ -455,7 +455,7 @@ insertArchiveList c nodeId a = do
where
where
query
::
PGS
.
Query
query
::
PGS
.
Query
query
=
[
sql
|
WITH s as (SELECT ? as sid, ? sversion, ? sngrams_type_id, ngrams.id as sngrams_id, ?::jsonb as srepo FROM ngrams WHERE terms = ?)
query
=
[
sql
|
WITH s as (SELECT ? as sid, ? sversion, ? sngrams_type_id, ngrams.id as sngrams_id, ?::jsonb as srepo FROM ngrams WHERE terms = ?)
INSERT INTO node_stories(node_id, version, ngrams_type_id, ngrams_id, ngrams_repo_element)
INSERT INTO node_stories(node_id, version, ngrams_type_id, ngrams_id, ngrams_repo_element)
SELECT s.sid, s.sversion, s.sngrams_type_id, s.sngrams_id, s.srepo from s s join nodes n on s.sid = n.id
SELECT s.sid, s.sversion, s.sngrams_type_id, s.sngrams_id, s.srepo from s s join nodes n on s.sid = n.id
|]
|]
...
@@ -505,22 +505,22 @@ updateNodeStory c nodeId@(NodeId _nId) currentArchive newArchive = do
...
@@ -505,22 +505,22 @@ updateNodeStory c nodeId@(NodeId _nId) currentArchive newArchive = do
--printDebug "[updateNodeStory] updates" $ Text.unlines $ (Text.pack . show) <$> updates
--printDebug "[updateNodeStory] updates" $ Text.unlines $ (Text.pack . show) <$> updates
-- 2. Perform inserts/deletes/updates
-- 2. Perform inserts/deletes/updates
printDebug
"[updateNodeStory] applying insert"
()
--
printDebug "[updateNodeStory] applying insert" ()
insertArchiveList
c
nodeId
$
Archive
{
_a_version
=
newArchive
^.
a_version
insertArchiveList
c
nodeId
$
Archive
{
_a_version
=
newArchive
^.
a_version
,
_a_history
=
[]
,
_a_history
=
[]
,
_a_state
=
archiveStateFromList
inserts
}
,
_a_state
=
archiveStateFromList
inserts
}
printDebug
"[updateNodeStory] insert applied"
()
--
printDebug "[updateNodeStory] insert applied" ()
--TODO Use currentArchive ^. a_version in delete and report error
--TODO Use currentArchive ^. a_version in delete and report error
-- if entries with (node_id, ngrams_type_id, ngrams_id) but
-- if entries with (node_id, ngrams_type_id, ngrams_id) but
-- different version are found.
-- different version are found.
deleteArchiveList
c
nodeId
$
Archive
{
_a_version
=
newArchive
^.
a_version
deleteArchiveList
c
nodeId
$
Archive
{
_a_version
=
newArchive
^.
a_version
,
_a_history
=
[]
,
_a_history
=
[]
,
_a_state
=
archiveStateFromList
deletes
}
,
_a_state
=
archiveStateFromList
deletes
}
printDebug
"[updateNodeStory] delete applied"
()
--
printDebug "[updateNodeStory] delete applied" ()
updateArchiveList
c
nodeId
$
Archive
{
_a_version
=
newArchive
^.
a_version
updateArchiveList
c
nodeId
$
Archive
{
_a_version
=
newArchive
^.
a_version
,
_a_history
=
[]
,
_a_history
=
[]
,
_a_state
=
archiveStateFromList
updates
}
,
_a_state
=
archiveStateFromList
updates
}
printDebug
"[updateNodeStory] update applied"
()
--
printDebug "[updateNodeStory] update applied" ()
pure
()
pure
()
-- where
-- where
...
...
src/Gargantext/Core/Text/Terms.hs
View file @
3b163685
...
@@ -28,8 +28,9 @@ compute graph
...
@@ -28,8 +28,9 @@ compute graph
-}
-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE ConstrainedClassMethods #-}
{-# LANGUAGE ConstrainedClassMethods #-}
{-# LANGUAGE StandaloneDeriving #-}
{-# LANGUAGE TemplateHaskell #-}
module
Gargantext.Core.Text.Terms
module
Gargantext.Core.Text.Terms
where
where
...
@@ -47,6 +48,7 @@ import qualified Data.Set as Set
...
@@ -47,6 +48,7 @@ import qualified Data.Set as Set
import
qualified
Data.Text
as
Text
import
qualified
Data.Text
as
Text
import
qualified
Data.HashMap.Strict
as
HashMap
import
qualified
Data.HashMap.Strict
as
HashMap
import
Gargantext.Core
import
Gargantext.Core
import
Gargantext.Core.Utils
(
groupWithCounts
)
import
Gargantext.Core.Text
(
sentences
,
HasText
(
..
))
import
Gargantext.Core.Text
(
sentences
,
HasText
(
..
))
import
Gargantext.Core.Text.Terms.Eleve
(
mainEleveWith
,
Tries
,
Token
,
buildTries
,
toToken
)
import
Gargantext.Core.Text.Terms.Eleve
(
mainEleveWith
,
Tries
,
Token
,
buildTries
,
toToken
)
import
Gargantext.Core.Text.Terms.Mono
(
monoTerms
)
import
Gargantext.Core.Text.Terms.Mono
(
monoTerms
)
...
@@ -70,24 +72,23 @@ data TermType lang
...
@@ -70,24 +72,23 @@ data TermType lang
,
_tt_model
::
!
(
Maybe
(
Tries
Token
()
))
,
_tt_model
::
!
(
Maybe
(
Tries
Token
()
))
}
}
deriving
(
Generic
)
deriving
(
Generic
)
deriving
instance
(
Show
lang
)
=>
Show
(
TermType
lang
)
makeLenses
''
T
ermType
makeLenses
''
T
ermType
--group :: [Text] -> [Text]
--group :: [Text] -> [Text]
--group = undefined
--group = undefined
-- remove Stop Words
-- remove Stop Words
-- map (filter (\t -> not . elem t)) $
-- map (filter (\t -> not . elem t)) $
------------------------------------------------------------------------
------------------------------------------------------------------------
-- | Sugar to extract terms from text (hidd
eng mapM
from end user).
-- | Sugar to extract terms from text (hidd
ing 'mapM'
from end user).
--extractTerms :: Traversable t => TermType Lang -> t Text -> IO (t [Terms])
--extractTerms :: Traversable t => TermType Lang -> t Text -> IO (t [Terms])
extractTerms
::
TermType
Lang
->
[
Text
]
->
IO
[[
Terms
]]
extractTerms
::
TermType
Lang
->
[
Text
]
->
IO
[[
TermsWithCount
]]
extractTerms
(
Unsupervised
{
..
})
xs
=
mapM
(
terms
(
Unsupervised
{
_tt_model
=
Just
m'
,
..
}))
xs
extractTerms
(
Unsupervised
{
..
})
xs
=
mapM
(
terms
(
Unsupervised
{
_tt_model
=
Just
m'
,
..
}))
xs
where
where
m'
=
case
_tt_model
of
m'
=
case
_tt_model
of
Just
m''
->
m''
Just
m''
->
m''
Nothing
->
newTries
_tt_windowSize
(
Text
.
intercalate
" "
xs
)
Nothing
->
newTries
_tt_windowSize
(
Text
.
intercalate
" "
xs
)
extractTerms
termTypeLang
xs
=
mapM
(
terms
termTypeLang
)
xs
extractTerms
termTypeLang
xs
=
mapM
(
terms
termTypeLang
)
xs
...
@@ -116,12 +117,13 @@ data ExtractedNgrams = SimpleNgrams { unSimpleNgrams :: Ngrams }
...
@@ -116,12 +117,13 @@ data ExtractedNgrams = SimpleNgrams { unSimpleNgrams :: Ngrams }
instance
Hashable
ExtractedNgrams
instance
Hashable
ExtractedNgrams
-- | A typeclass that represents extracting ngrams from an entity.
class
ExtractNgramsT
h
class
ExtractNgramsT
h
where
where
extractNgramsT
::
HasText
h
extractNgramsT
::
HasText
h
=>
TermType
Lang
=>
TermType
Lang
->
h
->
h
->
Cmd
err
(
HashMap
ExtractedNgrams
(
Map
NgramsType
Int
))
->
Cmd
err
(
HashMap
ExtractedNgrams
(
Map
NgramsType
Int
,
TermsCount
))
------------------------------------------------------------------------
------------------------------------------------------------------------
enrichedTerms
::
Lang
->
PosTagAlgo
->
POS
->
Terms
->
NgramsPostag
enrichedTerms
::
Lang
->
PosTagAlgo
->
POS
->
Terms
->
NgramsPostag
enrichedTerms
l
pa
po
(
Terms
ng1
ng2
)
=
enrichedTerms
l
pa
po
(
Terms
ng1
ng2
)
=
...
@@ -132,7 +134,7 @@ enrichedTerms l pa po (Terms ng1 ng2) =
...
@@ -132,7 +134,7 @@ enrichedTerms l pa po (Terms ng1 ng2) =
------------------------------------------------------------------------
------------------------------------------------------------------------
cleanNgrams
::
Int
->
Ngrams
->
Ngrams
cleanNgrams
::
Int
->
Ngrams
->
Ngrams
cleanNgrams
s
ng
cleanNgrams
s
ng
|
Text
.
length
(
ng
^.
ngramsTerms
)
<
s
=
ng
|
Text
.
length
(
ng
^.
ngramsTerms
)
<
s
=
ng
|
otherwise
=
text2ngrams
(
Text
.
take
s
(
ng
^.
ngramsTerms
))
|
otherwise
=
text2ngrams
(
Text
.
take
s
(
ng
^.
ngramsTerms
))
...
@@ -151,10 +153,10 @@ insertExtractedNgrams ngs = do
...
@@ -151,10 +153,10 @@ insertExtractedNgrams ngs = do
let
(
s
,
e
)
=
List
.
partition
isSimpleNgrams
ngs
let
(
s
,
e
)
=
List
.
partition
isSimpleNgrams
ngs
m1
<-
insertNgrams
(
map
unSimpleNgrams
s
)
m1
<-
insertNgrams
(
map
unSimpleNgrams
s
)
--printDebug "others" m1
--printDebug "others" m1
m2
<-
insertNgramsPostag
(
map
unEnrichedNgrams
e
)
m2
<-
insertNgramsPostag
(
map
unEnrichedNgrams
e
)
--printDebug "terms" m2
--printDebug "terms" m2
let
result
=
HashMap
.
union
m1
m2
let
result
=
HashMap
.
union
m1
m2
pure
result
pure
result
...
@@ -163,43 +165,48 @@ isSimpleNgrams (SimpleNgrams _) = True
...
@@ -163,43 +165,48 @@ isSimpleNgrams (SimpleNgrams _) = True
isSimpleNgrams
_
=
False
isSimpleNgrams
_
=
False
------------------------------------------------------------------------
------------------------------------------------------------------------
-- | Terms from
Text
-- | Terms from
'Text'
--
Mono
: mono terms
--
'Mono'
: mono terms
--
Multi
: multi terms
--
'Multi'
: multi terms
--
MonoMulti
: mono and multi
--
'MonoMulti'
: mono and multi
-- TODO : multi terms should exclude mono (intersection is not empty yet)
-- TODO : multi terms should exclude mono (intersection is not empty yet)
terms
::
TermType
Lang
->
Text
->
IO
[
Terms
]
terms
::
TermType
Lang
->
Text
->
IO
[
TermsWithCount
]
terms
(
Mono
lang
)
txt
=
pure
$
monoTerms
lang
txt
terms
tt
txt
=
do
terms
(
Multi
lang
)
txt
=
multiterms
lang
txt
printDebug
"[terms] tt"
tt
terms
(
MonoMulti
lang
)
txt
=
terms
(
Multi
lang
)
txt
printDebug
"[terms] txt"
txt
terms
(
Unsupervised
{
..
})
txt
=
termsUnsupervised
(
Unsupervised
{
_tt_model
=
Just
m'
,
..
})
txt
out
<-
termsNoLog
tt
txt
printDebug
"[terms] out"
out
pure
out
termsNoLog
::
TermType
Lang
->
Text
->
IO
[
TermsWithCount
]
termsNoLog
(
Mono
lang
)
txt
=
pure
$
monoTerms
lang
txt
termsNoLog
(
Multi
lang
)
txt
=
multiterms
lang
txt
termsNoLog
(
MonoMulti
lang
)
txt
=
terms
(
Multi
lang
)
txt
termsNoLog
(
Unsupervised
{
..
})
txt
=
pure
$
termsUnsupervised
(
Unsupervised
{
_tt_model
=
Just
m'
,
..
})
txt
where
where
m'
=
maybe
(
newTries
_tt_ngramsSize
txt
)
identity
_tt_model
m'
=
maybe
(
newTries
_tt_ngramsSize
txt
)
identity
_tt_model
-- terms (WithList list) txt = pure . concat $ extractTermsWithList list txt
-- terms (WithList list) txt = pure . concat $ extractTermsWithList list txt
------------------------------------------------------------------------
------------------------------------------------------------------------
-- | Unsupervised ngrams extraction
-- language agnostic extraction
-- TODO: remove IO
-- TODO: newtype BlockText
type
WindowSize
=
Int
type
WindowSize
=
Int
type
MinNgramSize
=
Int
type
MinNgramSize
=
Int
termsUnsupervised
::
TermType
Lang
->
Text
->
IO
[
Terms
]
-- | Unsupervised ngrams extraction
termsUnsupervised
(
Unsupervised
l
n
s
m
)
=
-- language agnostic extraction
pure
-- TODO: newtype BlockText
.
map
(
text2term
l
)
termsUnsupervised
::
TermType
Lang
->
Text
->
[
TermsWithCount
]
.
List
.
nub
termsUnsupervised
(
Unsupervised
{
_tt_model
=
Nothing
})
=
panic
"[termsUnsupervised] no model"
.
(
List
.
filter
(
\
l'
->
List
.
length
l'
>=
s
))
termsUnsupervised
(
Unsupervised
{
_tt_model
=
Just
_tt_model
,
..
})
=
map
(
\
(
t
,
cnt
)
->
(
text2term
_tt_lang
t
,
cnt
))
.
groupWithCounts
-- . List.nub
.
(
List
.
filter
(
\
l'
->
List
.
length
l'
>=
_tt_windowSize
))
.
List
.
concat
.
List
.
concat
.
mainEleveWith
(
maybe
(
panic
"no model"
)
identity
m
)
n
.
mainEleveWith
_tt_model
_tt_ngramsSize
.
uniText
.
uniText
termsUnsupervised
_
=
undefined
termsUnsupervised
_
=
undefined
newTries
::
Int
->
Text
->
Tries
Token
()
newTries
::
Int
->
Text
->
Tries
Token
()
newTries
n
t
=
buildTries
n
(
fmap
toToken
$
uniText
t
)
newTries
n
t
=
buildTries
n
(
fmap
toToken
$
uniText
t
)
...
@@ -217,5 +224,3 @@ text2term lang txt = Terms txt (Set.fromList $ map (stem lang) txt)
...
@@ -217,5 +224,3 @@ text2term lang txt = Terms txt (Set.fromList $ map (stem lang) txt)
isPunctuation
::
Text
->
Bool
isPunctuation
::
Text
->
Bool
isPunctuation
x
=
List
.
elem
x
$
(
Text
.
pack
.
pure
)
isPunctuation
x
=
List
.
elem
x
$
(
Text
.
pack
.
pure
)
<$>
(
"!?(),;.:"
::
String
)
<$>
(
"!?(),;.:"
::
String
)
src/Gargantext/Core/Text/Terms/Eleve.hs
View file @
3b163685
...
@@ -32,6 +32,7 @@ Notes for current implementation:
...
@@ -32,6 +32,7 @@ Notes for current implementation:
-}
-}
{-# LANGUAGE ConstraintKinds #-}
{-# LANGUAGE ConstraintKinds #-}
{-# LANGUAGE StandaloneDeriving #-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE TypeFamilies #-}
{-# LANGUAGE TypeFamilies #-}
...
@@ -278,6 +279,8 @@ data Tries k e = Tries
...
@@ -278,6 +279,8 @@ data Tries k e = Tries
makeLenses
''
T
ries
makeLenses
''
T
ries
deriving
instance
(
Show
k
,
Show
e
)
=>
Show
(
Tries
k
e
)
buildTries
::
Int
->
[[
Token
]]
->
Tries
Token
()
buildTries
::
Int
->
[[
Token
]]
->
Tries
Token
()
buildTries
n
sentences
=
Tries
buildTries
n
sentences
=
Tries
{
_fwd
=
buildTrie
Forward
n
sentences
{
_fwd
=
buildTrie
Forward
n
sentences
...
...
src/Gargantext/Core/Text/Terms/Mono.hs
View file @
3b163685
...
@@ -40,8 +40,8 @@ words = monoTexts
...
@@ -40,8 +40,8 @@ words = monoTexts
isSep
::
Char
->
Bool
isSep
::
Char
->
Bool
isSep
=
(`
elem
`
(
",.:;?!(){}[]
\"\'
"
::
String
))
isSep
=
(`
elem
`
(
",.:;?!(){}[]
\"\'
"
::
String
))
monoTerms
::
Lang
->
Text
->
[
Terms
]
monoTerms
::
Lang
->
Text
->
[
Terms
WithCount
]
monoTerms
l
txt
=
map
(
monoText2term
l
)
$
monoTexts
txt
monoTerms
l
txt
=
map
(
\
t
->
(
monoText2term
l
t
,
1
)
)
$
monoTexts
txt
monoTexts
::
Text
->
[
Text
]
monoTexts
::
Text
->
[
Text
]
monoTexts
=
L
.
concat
.
monoTextsBySentence
monoTexts
=
L
.
concat
.
monoTextsBySentence
...
...
src/Gargantext/Core/Text/Terms/Multi.hs
View file @
3b163685
...
@@ -21,6 +21,7 @@ import Data.List (concat)
...
@@ -21,6 +21,7 @@ import Data.List (concat)
import
Gargantext.Prelude
import
Gargantext.Prelude
import
Gargantext.Core
(
Lang
(
..
))
import
Gargantext.Core
(
Lang
(
..
))
import
Gargantext.Core.Types
import
Gargantext.Core.Types
import
Gargantext.Core.Utils
(
groupWithCounts
)
import
Gargantext.Core.Text.Terms.Multi.PosTagging
import
Gargantext.Core.Text.Terms.Multi.PosTagging
import
Gargantext.Core.Text.Terms.Multi.PosTagging.Types
import
Gargantext.Core.Text.Terms.Multi.PosTagging.Types
...
@@ -37,14 +38,16 @@ import qualified Gargantext.Utils.SpacyNLP as SpacyNLP
...
@@ -37,14 +38,16 @@ import qualified Gargantext.Utils.SpacyNLP as SpacyNLP
type
NLP_API
=
Lang
->
Text
->
IO
PosSentences
type
NLP_API
=
Lang
->
Text
->
IO
PosSentences
-------------------------------------------------------------------
-------------------------------------------------------------------
multiterms
::
Lang
->
Text
->
IO
[
Terms
]
multiterms
::
Lang
->
Text
->
IO
[
TermsWithCount
]
multiterms
=
multiterms'
tokenTag2terms
multiterms
l
txt
=
do
ret
<-
multiterms'
tokenTag2terms
l
txt
pure
$
groupWithCounts
ret
where
where
multiterms'
::
(
TokenTag
->
a
)
->
Lang
->
Text
->
IO
[
a
]
multiterms'
::
(
TokenTag
->
a
)
->
Lang
->
Text
->
IO
[
a
]
multiterms'
f
lang
txt
=
concat
multiterms'
f
lang
txt
'
=
concat
<$>
map
(
map
f
)
<$>
map
(
map
f
)
<$>
map
(
filter
(
\
t
->
_my_token_pos
t
==
Just
NP
))
<$>
map
(
filter
(
\
t
->
_my_token_pos
t
==
Just
NP
))
<$>
tokenTags
lang
txt
<$>
tokenTags
lang
txt
'
-------------------------------------------------------------------
-------------------------------------------------------------------
tokenTag2terms
::
TokenTag
->
Terms
tokenTag2terms
::
TokenTag
->
Terms
...
@@ -57,10 +60,10 @@ tokenTags l _ = panic $ "[G.C.T.T.Multi] Lang NLP API not implemented yet " <
...
@@ -57,10 +60,10 @@ tokenTags l _ = panic $ "[G.C.T.T.Multi] Lang NLP API not implemented yet " <
tokenTagsWith
::
Lang
->
Text
->
NLP_API
->
IO
[[
TokenTag
]]
tokenTagsWith
::
Lang
->
Text
->
NLP_API
->
IO
[[
TokenTag
]]
tokenTagsWith
lang
txt
nlp
=
map
(
groupTokens
lang
)
tokenTagsWith
lang
txt
nlp
=
map
(
groupTokens
lang
)
<$>
map
tokens2tokensTags
<$>
map
tokens2tokensTags
<$>
map
_sentenceTokens
<$>
map
_sentenceTokens
<$>
_sentences
<$>
_sentences
<$>
nlp
lang
txt
<$>
nlp
lang
txt
---- | This function analyses and groups (or not) ngrams according to
---- | This function analyses and groups (or not) ngrams according to
...
...
src/Gargantext/Core/Text/Terms/Multi/Group.hs
View file @
3b163685
...
@@ -30,4 +30,3 @@ group2 p1 p2 (x@(TokenTag _ _ Nothing _):y) = (x: group2 p1 p2 y)
...
@@ -30,4 +30,3 @@ group2 p1 p2 (x@(TokenTag _ _ Nothing _):y) = (x: group2 p1 p2 y)
group2
_
_
[
x
@
(
TokenTag
_
_
(
Just
_
)
_
)]
=
[
x
]
group2
_
_
[
x
@
(
TokenTag
_
_
(
Just
_
)
_
)]
=
[
x
]
group2
p1
p2
(
x
@
(
TokenTag
_
_
(
Just
_
)
_
)
:
y
@
(
TokenTag
_
_
Nothing
_
)
:
z
)
=
(
x
:
y
:
group2
p1
p2
(
y
:
z
))
group2
p1
p2
(
x
@
(
TokenTag
_
_
(
Just
_
)
_
)
:
y
@
(
TokenTag
_
_
Nothing
_
)
:
z
)
=
(
x
:
y
:
group2
p1
p2
(
y
:
z
))
group2
_
_
[]
=
[]
group2
_
_
[]
=
[]
src/Gargantext/Core/Text/Terms/Multi/Lang/En.hs
View file @
3b163685
...
@@ -40,4 +40,3 @@ groupTokens ntags = group2 NP NP
...
@@ -40,4 +40,3 @@ groupTokens ntags = group2 NP NP
--groupNgrams ((x,_,"LOCATION"):(y,yy,"LOCATION"):xs) = groupNgrams ((x <> " " <> y,yy,"LOCATION"):xs)
--groupNgrams ((x,_,"LOCATION"):(y,yy,"LOCATION"):xs) = groupNgrams ((x <> " " <> y,yy,"LOCATION"):xs)
--
--
--groupNgrams (x:xs) = (x:(groupNgrams xs))
--groupNgrams (x:xs) = (x:(groupNgrams xs))
src/Gargantext/Core/Text/Terms/Multi/PosTagging.hs
View file @
3b163685
...
@@ -45,19 +45,22 @@ tokens2tokensTags :: [Token] -> [TokenTag]
...
@@ -45,19 +45,22 @@ tokens2tokensTags :: [Token] -> [TokenTag]
tokens2tokensTags
ts
=
filter'
$
map
tokenTag
ts
tokens2tokensTags
ts
=
filter'
$
map
tokenTag
ts
------------------------------------------------------------------------
------------------------------------------------------------------------
tokenTag
::
Token
->
TokenTag
tokenTag
::
Token
->
TokenTag
tokenTag
(
Token
_
w
_
l
_
_
p
n
_
_
)
=
TokenTag
w'
l'
p
n
tokenTag
(
Token
{
..
})
=
TokenTag
{
_my_token_word
=
w'
,
_my_token_lemma
=
l'
,
_my_token_pos
=
_tokenPos
,
_my_token_ner
=
_tokenNer
}
where
where
w'
=
split
w
w'
=
split
_tokenWord
l'
=
fromList
(
split
l
)
l'
=
fromList
(
split
_tokenLemma
)
split
=
splitOn
(
pack
" "
)
.
toLower
split
=
splitOn
(
pack
" "
)
.
toLower
filter'
::
[
TokenTag
]
->
[
TokenTag
]
filter'
::
[
TokenTag
]
->
[
TokenTag
]
filter'
xs
=
filter
isNgrams
xs
filter'
xs
=
filter
isNgrams
xs
where
where
isNgrams
(
TokenTag
_
_
p
n
)
=
isJust
p
||
isJust
n
isNgrams
(
TokenTag
{
..
})
=
isJust
_my_token_pos
||
isJust
_my_token_ner
------------------------------------------------------------------------
------------------------------------------------------------------------
-- request =
-- request =
-- "fr" : {
-- "fr" : {
-- "tokenize.language" : "fr",
-- "tokenize.language" : "fr",
-- "pos.model" : "edu/stanford/nlp/models/pos-tagger/french/french.tagger",
-- "pos.model" : "edu/stanford/nlp/models/pos-tagger/french/french.tagger",
...
@@ -66,9 +69,9 @@ filter' xs = filter isNgrams xs
...
@@ -66,9 +69,9 @@ filter' xs = filter isNgrams xs
-- "depparse.model" : "edu/stanford/nlp/models/parser/nndep/UD_French.gz",
-- "depparse.model" : "edu/stanford/nlp/models/parser/nndep/UD_French.gz",
-- "depparse.language" : "french",
-- "depparse.language" : "french",
-- "ner.model": DATA_ROOT+"/eunews.fr.crf.gz",
-- "ner.model": DATA_ROOT+"/eunews.fr.crf.gz",
-- "ssplit.newlineIsSentenceBreak": "always"
-- "ssplit.newlineIsSentenceBreak": "always"
-- },
-- },
--
--
corenlp'
::
(
FromJSON
a
corenlp'
::
(
FromJSON
a
,
ConvertibleStrings
p
ByteString
,
ConvertibleStrings
p
ByteString
...
@@ -80,6 +83,7 @@ corenlp' lang txt = do
...
@@ -80,6 +83,7 @@ corenlp' lang txt = do
FR
->
"{
\"
annotators
\"
:
\"
tokenize,ssplit,pos,lemma,ner
\"
,
\"
parse.model
\"
:
\"
edu/stanford/nlp/models/lexparser/frenchFactored.ser.gz
\"
,
\"
pos.model
\"
:
\"
edu/stanford/nlp/models/pos-tagger/french/french.tagger
\"
,
\"
tokenize.language
\"
:
\"
fr
\"
,
\"
outputFormat
\"
:
\"
json
\"
}"
FR
->
"{
\"
annotators
\"
:
\"
tokenize,ssplit,pos,lemma,ner
\"
,
\"
parse.model
\"
:
\"
edu/stanford/nlp/models/lexparser/frenchFactored.ser.gz
\"
,
\"
pos.model
\"
:
\"
edu/stanford/nlp/models/pos-tagger/french/french.tagger
\"
,
\"
tokenize.language
\"
:
\"
fr
\"
,
\"
outputFormat
\"
:
\"
json
\"
}"
_
->
panic
$
pack
"not implemented yet"
_
->
panic
$
pack
"not implemented yet"
url
<-
parseRequest
$
"POST http://localhost:9000/?properties="
<>
properties
url
<-
parseRequest
$
"POST http://localhost:9000/?properties="
<>
properties
-- curl -XPOST 'http://localhost:9000/?properties=%7B%22annotators%22:%20%22tokenize,ssplit,pos,ner%22,%20%22outputFormat%22:%20%22json%22%7D' -d 'hello world, hello' | jq .
let
request
=
setRequestBodyLBS
(
cs
txt
)
url
let
request
=
setRequestBodyLBS
(
cs
txt
)
url
httpJSON
request
httpJSON
request
...
...
src/Gargantext/Core/Text/Terms/Multi/PosTagging/Types.hs
View file @
3b163685
...
@@ -52,4 +52,3 @@ data PosSentences = PosSentences { _sentences :: [Sentence]}
...
@@ -52,4 +52,3 @@ data PosSentences = PosSentences { _sentences :: [Sentence]}
deriving
(
Show
,
Generic
)
deriving
(
Show
,
Generic
)
$
(
deriveJSON
(
unPrefix
"_"
)
''
P
osSentences
)
$
(
deriveJSON
(
unPrefix
"_"
)
''
P
osSentences
)
src/Gargantext/Core/Text/Terms/WithList.hs
View file @
3b163685
{-|
{-|
Module : Gargantext.Core.Text.Terms.WithList
Module : Gargantext.Core.Text.Terms.WithList
Description :
Description :
Copyright : (c) CNRS, 2017-Present
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Maintainer : team@gargantext.org
...
@@ -21,6 +21,8 @@ import Data.Text (Text, concat, unwords)
...
@@ -21,6 +21,8 @@ import Data.Text (Text, concat, unwords)
import
Gargantext.Prelude
import
Gargantext.Prelude
import
Gargantext.Core.Text.Context
import
Gargantext.Core.Text.Context
import
Gargantext.Core.Text.Terms.Mono
(
monoTextsBySentence
)
import
Gargantext.Core.Text.Terms.Mono
(
monoTextsBySentence
)
import
Gargantext.Core.Types
(
TermsCount
)
import
Gargantext.Core.Utils
(
groupWithCounts
)
import
Prelude
(
error
)
import
Prelude
(
error
)
import
qualified
Data.Algorithms.KMP
as
KMP
import
qualified
Data.Algorithms.KMP
as
KMP
import
qualified
Data.IntMap.Strict
as
IntMap
import
qualified
Data.IntMap.Strict
as
IntMap
...
@@ -72,8 +74,8 @@ buildPatterns = sortWith (Down . _pat_length) . concatMap buildPattern
...
@@ -72,8 +74,8 @@ buildPatterns = sortWith (Down . _pat_length) . concatMap buildPattern
-- Utils
-- Utils
type
BlockText
=
Text
type
BlockText
=
Text
type
MatchedText
=
Text
type
MatchedText
=
Text
termsInText
::
Patterns
->
BlockText
->
[
MatchedText
]
termsInText
::
Patterns
->
BlockText
->
[
(
MatchedText
,
TermsCount
)
]
termsInText
pats
txt
=
List
.
nub
termsInText
pats
txt
=
groupWithCounts
$
List
.
concat
$
List
.
concat
$
map
(
map
unwords
)
$
map
(
map
unwords
)
$
extractTermsWithList
pats
txt
$
extractTermsWithList
pats
txt
...
@@ -96,7 +98,7 @@ extractTermsWithList' pats = map (concat . map concat . replaceTerms pats)
...
@@ -96,7 +98,7 @@ extractTermsWithList' pats = map (concat . map concat . replaceTerms pats)
{- | Not used
{- | Not used
filterWith :: TermList
filterWith :: TermList
-> (a -> Text)
-> (a -> Text)
-> [a]
-> [a]
-> [(a, [Text])]
-> [(a, [Text])]
filterWith termList f xs = filterWith' termList f zip xs
filterWith termList f xs = filterWith' termList f zip xs
...
@@ -104,7 +106,7 @@ filterWith termList f xs = filterWith' termList f zip xs
...
@@ -104,7 +106,7 @@ filterWith termList f xs = filterWith' termList f zip xs
filterWith' :: TermList
filterWith' :: TermList
-> (a -> Text)
-> (a -> Text)
-> ([a] -> [[Text]] -> [b])
-> ([a] -> [[Text]] -> [b])
-> [a]
-> [a]
-> [b]
-> [b]
filterWith' termList f f' xs = f' xs
filterWith' termList f f' xs = f' xs
$ map (extractTermsWithList' pats)
$ map (extractTermsWithList' pats)
...
...
src/Gargantext/Core/Types.hs
View file @
3b163685
{-|
{-|
Module : Gargantext.Types
Module : Gargantext.Types
Description :
Description :
Copyright : (c) CNRS, 2017-Present
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Maintainer : team@gargantext.org
...
@@ -17,7 +17,7 @@ commentary with @some markup@.
...
@@ -17,7 +17,7 @@ commentary with @some markup@.
module
Gargantext.Core.Types
(
module
Gargantext
.
Core
.
Types
.
Main
module
Gargantext.Core.Types
(
module
Gargantext
.
Core
.
Types
.
Main
,
module
Gargantext
.
Database
.
Admin
.
Types
.
Node
,
module
Gargantext
.
Database
.
Admin
.
Types
.
Node
,
DebugMode
(
..
),
withDebugMode
,
DebugMode
(
..
),
withDebugMode
,
Term
,
Terms
(
..
)
,
Term
,
Terms
(
..
)
,
TermsCount
,
TermsWithCount
,
TokenTag
(
..
),
POS
(
..
),
NER
(
..
)
,
TokenTag
(
..
),
POS
(
..
),
NER
(
..
)
,
Label
,
Stems
,
Label
,
Stems
,
HasInvalidError
(
..
),
assertValid
,
HasInvalidError
(
..
),
assertValid
...
@@ -70,10 +70,13 @@ type Label = [Text]
...
@@ -70,10 +70,13 @@ type Label = [Text]
data
Terms
=
Terms
{
_terms_label
::
Label
data
Terms
=
Terms
{
_terms_label
::
Label
,
_terms_stem
::
Stems
,
_terms_stem
::
Stems
}
deriving
(
Ord
,
Show
)
}
deriving
(
Ord
,
Show
)
instance
Eq
Terms
where
instance
Eq
Terms
where
(
==
)
(
Terms
_
s1
)
(
Terms
_
s2
)
=
s1
==
s2
(
==
)
(
Terms
_
s1
)
(
Terms
_
s2
)
=
s1
==
s2
type
TermsCount
=
Int
type
TermsWithCount
=
(
Terms
,
TermsCount
)
------------------------------------------------------------------------
------------------------------------------------------------------------
data
Tag
=
POS
|
NER
data
Tag
=
POS
|
NER
deriving
(
Show
,
Eq
)
deriving
(
Show
,
Eq
)
...
@@ -208,5 +211,3 @@ data TODO = TODO
...
@@ -208,5 +211,3 @@ data TODO = TODO
instance
ToSchema
TODO
where
instance
ToSchema
TODO
where
instance
ToParamSchema
TODO
where
instance
ToParamSchema
TODO
where
----------------------------------------------------------------------------
----------------------------------------------------------------------------
src/Gargantext/Core/Utils.hs
View file @
3b163685
{-|
{-|
Module : Gargantext.Utils
Module : Gargantext.Utils
Description :
Description :
Copyright : (c) CNRS, 2017-Present
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Maintainer : team@gargantext.org
...
@@ -12,16 +12,19 @@ commentary with @some markup@.
...
@@ -12,16 +12,19 @@ commentary with @some markup@.
-}
-}
module
Gargantext.Core.Utils
(
module
Gargantext.Core.Utils
(
-- module Gargantext.Utils.Chronos
-- module Gargantext.Utils.Chronos
module
Gargantext
.
Core
.
Utils
.
Prefix
module
Gargantext
.
Core
.
Utils
.
Prefix
,
something
,
something
,
alphanum
,
alphanum
,
choices
,
choices
,
randomString
,
randomString
,
groupWithCounts
,
addTuples
)
where
)
where
import
Data.Char
(
chr
,
ord
)
import
Data.Char
(
chr
,
ord
)
import
qualified
Data.List
as
List
import
Data.Maybe
import
Data.Maybe
import
Data.Monoid
import
Data.Monoid
import
Data.Text
(
Text
,
pack
)
import
Data.Text
(
Text
,
pack
)
...
@@ -57,3 +60,17 @@ randomString :: Int -> IO Text
...
@@ -57,3 +60,17 @@ randomString :: Int -> IO Text
randomString
num
=
do
randomString
num
=
do
str
<-
choices
num
alphanum
str
<-
choices
num
alphanum
pure
$
pack
str
pure
$
pack
str
-- | Given a list of items of type 'a', return list with unique items
-- (like List.nub) but tuple-d with their counts in the original list
groupWithCounts
::
(
Ord
a
,
Eq
a
)
=>
[
a
]
->
[(
a
,
Int
)]
groupWithCounts
=
map
f
.
List
.
group
.
List
.
sort
where
f
[]
=
panic
"[groupWithCounts] impossible"
f
ts
@
(
t
:
_
)
=
(
t
,
length
ts
)
addTuples
::
(
Num
a
,
Num
b
)
=>
(
a
,
b
)
->
(
a
,
b
)
->
(
a
,
b
)
addTuples
(
a1
,
b1
)
(
a2
,
b2
)
=
(
a1
+
a2
,
b1
+
b2
)
src/Gargantext/Core/Viz/Phylo/Legacy/LegacyMain.hs
View file @
3b163685
...
@@ -65,7 +65,7 @@ flowPhylo cId = do
...
@@ -65,7 +65,7 @@ flowPhylo cId = do
patterns
=
buildPatterns
termList
patterns
=
buildPatterns
termList
-- | To filter the Ngrams of a document based on the termList
-- | To filter the Ngrams of a document based on the termList
filterTerms
::
Patterns
->
(
Date
,
Text
)
->
(
Date
,
[
Text
])
filterTerms
::
Patterns
->
(
Date
,
Text
)
->
(
Date
,
[
Text
])
filterTerms
patterns'
(
y
,
d
)
=
(
y
,
termsInText
patterns'
d
)
filterTerms
patterns'
(
y
,
d
)
=
(
y
,
fst
<$>
termsInText
patterns'
d
)
docs
=
map
((
\
(
y
,
t
)
->
Document
y
t
)
.
filterTerms
patterns
)
docs'
docs
=
map
((
\
(
y
,
t
)
->
Document
y
t
)
.
filterTerms
patterns
)
docs'
...
@@ -123,4 +123,3 @@ writePhylo _fp _phview = undefined
...
@@ -123,4 +123,3 @@ writePhylo _fp _phview = undefined
-- refactor 2021
-- refactor 2021
-- viewPhylo2Svg :: PhyloView -> IO DB.ByteString
-- viewPhylo2Svg :: PhyloView -> IO DB.ByteString
-- viewPhylo2Svg p = graphvizWithHandle Dot (viewToDot p) Svg DB.hGetContents
-- viewPhylo2Svg p = graphvizWithHandle Dot (viewToDot p) Svg DB.hGetContents
src/Gargantext/Database/Action/Flow.hs
View file @
3b163685
...
@@ -48,7 +48,7 @@ module Gargantext.Database.Action.Flow -- (flowDatabase, ngrams2list)
...
@@ -48,7 +48,7 @@ module Gargantext.Database.Action.Flow -- (flowDatabase, ngrams2list)
where
where
import
Conduit
import
Conduit
import
Control.Lens
((
^.
),
view
,
_Just
,
makeLenses
)
import
Control.Lens
((
^.
),
view
,
_Just
,
makeLenses
,
over
,
traverse
)
import
Data.Aeson.TH
(
deriveJSON
)
import
Data.Aeson.TH
(
deriveJSON
)
import
Data.Conduit.Internal
(
zipSources
)
import
Data.Conduit.Internal
(
zipSources
)
import
Data.Either
import
Data.Either
...
@@ -60,7 +60,6 @@ import Data.Maybe (catMaybes)
...
@@ -60,7 +60,6 @@ import Data.Maybe (catMaybes)
import
Data.Monoid
import
Data.Monoid
import
Data.Swagger
import
Data.Swagger
import
qualified
Data.Text
as
T
import
qualified
Data.Text
as
T
import
Data.Traversable
(
traverse
)
import
Data.Tuple.Extra
(
first
,
second
)
import
Data.Tuple.Extra
(
first
,
second
)
import
GHC.Generics
(
Generic
)
import
GHC.Generics
(
Generic
)
import
Servant.Client
(
ClientError
)
import
Servant.Client
(
ClientError
)
...
@@ -83,9 +82,10 @@ import Gargantext.Core.Text.List.Group.WithStem ({-StopSize(..),-} GroupParams(.
...
@@ -83,9 +82,10 @@ import Gargantext.Core.Text.List.Group.WithStem ({-StopSize(..),-} GroupParams(.
import
Gargantext.Core.Text.List.Social
(
FlowSocialListWith
)
import
Gargantext.Core.Text.List.Social
(
FlowSocialListWith
)
import
Gargantext.Core.Text.Terms
import
Gargantext.Core.Text.Terms
import
Gargantext.Core.Text.Terms.Mono.Stem.En
(
stemIt
)
import
Gargantext.Core.Text.Terms.Mono.Stem.En
(
stemIt
)
import
Gargantext.Core.Types
(
POS
(
NP
))
import
Gargantext.Core.Types
(
POS
(
NP
)
,
TermsCount
)
import
Gargantext.Core.Types.Individu
(
User
(
..
))
import
Gargantext.Core.Types.Individu
(
User
(
..
))
import
Gargantext.Core.Types.Main
import
Gargantext.Core.Types.Main
import
Gargantext.Core.Utils
(
addTuples
)
import
Gargantext.Core.Utils.Prefix
(
unPrefix
,
unPrefixSwagger
)
import
Gargantext.Core.Utils.Prefix
(
unPrefix
,
unPrefixSwagger
)
import
Gargantext.Database.Action.Flow.List
import
Gargantext.Database.Action.Flow.List
import
Gargantext.Database.Action.Flow.Types
import
Gargantext.Database.Action.Flow.Types
...
@@ -284,7 +284,7 @@ flow c u cn la mfslw (mLength, docsC) logStatus = do
...
@@ -284,7 +284,7 @@ flow c u cn la mfslw (mLength, docsC) logStatus = do
,
_scst_events
=
Just
[]
,
_scst_events
=
Just
[]
}
}
pure
$
Prelude
.
head
id
pure
$
Prelude
.
head
id
------------------------------------------------------------------------
------------------------------------------------------------------------
...
@@ -357,25 +357,27 @@ insertMasterDocs c lang hs = do
...
@@ -357,25 +357,27 @@ insertMasterDocs c lang hs = do
-- this will enable global database monitoring
-- this will enable global database monitoring
-- maps :: IO Map Ngrams (Map NgramsType (Map NodeId Int))
-- maps :: IO Map Ngrams (Map NgramsType (Map NodeId Int))
mapNgramsDocs'
::
HashMap
ExtractedNgrams
(
Map
NgramsType
(
Map
NodeId
Int
))
mapNgramsDocs'
::
HashMap
ExtractedNgrams
(
Map
NgramsType
(
Map
NodeId
(
Int
,
TermsCount
)
))
<-
mapNodeIdNgrams
<-
mapNodeIdNgrams
<$>
documentIdWithNgrams
<$>
documentIdWithNgrams
(
extractNgramsT
$
withLang
lang
documentsWithId
)
(
extractNgramsT
$
withLang
lang
documentsWithId
)
documentsWithId
documentsWithId
lId
<-
getOrMkList
masterCorpusId
masterUserId
lId
<-
getOrMkList
masterCorpusId
masterUserId
-- _ <- saveDocNgramsWith lId mapNgramsDocs'
_
<-
saveDocNgramsWith
lId
mapNgramsDocs'
_
<-
saveDocNgramsWith
lId
mapNgramsDocs'
-- _cooc <- insertDefaultNode NodeListCooc lId masterUserId
-- _cooc <- insertDefaultNode NodeListCooc lId masterUserId
pure
ids'
pure
ids'
saveDocNgramsWith
::
(
FlowCmdM
env
err
m
)
saveDocNgramsWith
::
(
FlowCmdM
env
err
m
)
=>
ListId
=>
ListId
->
HashMap
ExtractedNgrams
(
Map
NgramsType
(
Map
NodeId
Int
))
->
HashMap
ExtractedNgrams
(
Map
NgramsType
(
Map
NodeId
(
Int
,
TermsCount
)
))
->
m
()
->
m
()
saveDocNgramsWith
lId
mapNgramsDocs'
=
do
saveDocNgramsWith
lId
mapNgramsDocs'
=
do
terms2id
<-
insertExtractedNgrams
$
HashMap
.
keys
mapNgramsDocs'
printDebug
"[saveDocNgramsWith] mapNgramsDocs'"
mapNgramsDocs'
--printDebug "terms2id" terms2id
let
mapNgramsDocsNoCount
=
over
(
traverse
.
traverse
.
traverse
)
fst
mapNgramsDocs'
terms2id
<-
insertExtractedNgrams
$
HashMap
.
keys
mapNgramsDocsNoCount
let
mapNgramsDocs
=
HashMap
.
mapKeys
extracted2ngrams
mapNgramsDocs'
let
mapNgramsDocs
=
HashMap
.
mapKeys
extracted2ngrams
mapNgramsDocs'
...
@@ -392,7 +394,7 @@ saveDocNgramsWith lId mapNgramsDocs' = do
...
@@ -392,7 +394,7 @@ saveDocNgramsWith lId mapNgramsDocs' = do
<*>
Just
(
fromIntegral
w
::
Double
)
<*>
Just
(
fromIntegral
w
::
Double
)
|
(
terms''
,
mapNgramsTypes
)
<-
HashMap
.
toList
mapNgramsDocs
|
(
terms''
,
mapNgramsTypes
)
<-
HashMap
.
toList
mapNgramsDocs
,
(
ngrams_type
,
mapNodeIdWeight
)
<-
Map
.
toList
mapNgramsTypes
,
(
ngrams_type
,
mapNodeIdWeight
)
<-
Map
.
toList
mapNgramsTypes
,
(
nId
,
w
)
<-
Map
.
toList
mapNodeIdWeight
,
(
nId
,
(
w
,
_cnt
)
)
<-
Map
.
toList
mapNodeIdWeight
]
]
-- to be removed
-- to be removed
...
@@ -451,28 +453,32 @@ mergeData rs = catMaybes . map toDocumentWithId . Map.toList
...
@@ -451,28 +453,32 @@ mergeData rs = catMaybes . map toDocumentWithId . Map.toList
------------------------------------------------------------------------
------------------------------------------------------------------------
documentIdWithNgrams
::
HasNodeError
err
documentIdWithNgrams
::
HasNodeError
err
=>
(
a
=>
(
a
->
Cmd
err
(
HashMap
b
(
Map
NgramsType
Int
)))
->
Cmd
err
(
HashMap
b
(
Map
NgramsType
Int
,
TermsCount
)))
->
[
Indexed
NodeId
a
]
->
[
Indexed
NodeId
a
]
->
Cmd
err
[
DocumentIdWithNgrams
a
b
]
->
Cmd
err
[
DocumentIdWithNgrams
a
b
]
documentIdWithNgrams
f
=
traverse
toDocumentIdWithNgrams
documentIdWithNgrams
f
=
traverse
toDocumentIdWithNgrams
where
where
toDocumentIdWithNgrams
d
=
do
toDocumentIdWithNgrams
d
=
do
e
<-
f
$
_unIndex
d
e
<-
f
$
_unIndex
d
pure
$
DocumentIdWithNgrams
d
e
pure
$
DocumentIdWithNgrams
d
e
-- | TODO check optimization
-- | TODO check optimization
mapNodeIdNgrams
::
(
Ord
b
,
Hashable
b
)
mapNodeIdNgrams
::
(
Ord
b
,
Hashable
b
)
=>
[
DocumentIdWithNgrams
a
b
]
=>
[
DocumentIdWithNgrams
a
b
]
->
HashMap
b
->
HashMap
b
(
Map
NgramsType
(
Map
NgramsType
(
Map
NodeId
Int
)
(
Map
NodeId
(
Int
,
TermsCount
)
)
)
)
mapNodeIdNgrams
=
HashMap
.
unionsWith
(
Map
.
unionWith
(
Map
.
unionWith
(
+
)
))
.
fmap
f
mapNodeIdNgrams
=
HashMap
.
unionsWith
(
Map
.
unionWith
(
Map
.
unionWith
addTuples
))
.
fmap
f
where
where
-- | NOTE We are somehow multiplying 'TermsCount' here: If the
-- same ngrams term has different ngrams types, the 'TermsCount'
-- for it (which is the number of times the terms appears in a
-- document) is copied over to all its types.
f
::
DocumentIdWithNgrams
a
b
f
::
DocumentIdWithNgrams
a
b
->
HashMap
b
(
Map
NgramsType
(
Map
NodeId
Int
))
->
HashMap
b
(
Map
NgramsType
(
Map
NodeId
(
Int
,
TermsCount
)
))
f
d
=
fmap
(
fmap
(
Map
.
singleton
nId
)
)
$
documentNgrams
d
f
d
=
fmap
(
\
(
ngramsTypeMap
,
cnt
)
->
fmap
(
\
i
->
Map
.
singleton
nId
(
i
,
cnt
))
ngramsTypeMap
)
$
documentNgrams
d
where
where
nId
=
_index
$
documentWithId
d
nId
=
_index
$
documentWithId
d
...
@@ -483,25 +489,25 @@ instance ExtractNgramsT HyperdataContact
...
@@ -483,25 +489,25 @@ instance ExtractNgramsT HyperdataContact
extractNgramsT
l
hc
=
HashMap
.
mapKeys
(
cleanExtractedNgrams
255
)
<$>
extract
l
hc
extractNgramsT
l
hc
=
HashMap
.
mapKeys
(
cleanExtractedNgrams
255
)
<$>
extract
l
hc
where
where
extract
::
TermType
Lang
->
HyperdataContact
extract
::
TermType
Lang
->
HyperdataContact
->
Cmd
err
(
HashMap
ExtractedNgrams
(
Map
NgramsType
Int
))
->
Cmd
err
(
HashMap
ExtractedNgrams
(
Map
NgramsType
Int
,
TermsCount
))
extract
_l
hc'
=
do
extract
_l
hc'
=
do
let
authors
=
map
text2ngrams
let
authors
=
map
text2ngrams
$
maybe
[
"Nothing"
]
(
\
a
->
[
a
])
$
maybe
[
"Nothing"
]
(
\
a
->
[
a
])
$
view
(
hc_who
.
_Just
.
cw_lastName
)
hc'
$
view
(
hc_who
.
_Just
.
cw_lastName
)
hc'
pure
$
HashMap
.
fromList
$
[(
SimpleNgrams
a'
,
Map
.
singleton
Authors
1
)
|
a'
<-
authors
]
pure
$
HashMap
.
fromList
$
[(
SimpleNgrams
a'
,
(
Map
.
singleton
Authors
1
,
1
)
)
|
a'
<-
authors
]
instance
ExtractNgramsT
HyperdataDocument
instance
ExtractNgramsT
HyperdataDocument
where
where
extractNgramsT
::
TermType
Lang
extractNgramsT
::
TermType
Lang
->
HyperdataDocument
->
HyperdataDocument
->
Cmd
err
(
HashMap
ExtractedNgrams
(
Map
NgramsType
Int
))
->
Cmd
err
(
HashMap
ExtractedNgrams
(
Map
NgramsType
Int
,
TermsCount
))
extractNgramsT
lang
hd
=
HashMap
.
mapKeys
(
cleanExtractedNgrams
255
)
<$>
extractNgramsT'
lang
hd
extractNgramsT
lang
hd
=
HashMap
.
mapKeys
(
cleanExtractedNgrams
255
)
<$>
extractNgramsT'
lang
hd
where
where
extractNgramsT'
::
TermType
Lang
extractNgramsT'
::
TermType
Lang
->
HyperdataDocument
->
HyperdataDocument
->
Cmd
err
(
HashMap
ExtractedNgrams
(
Map
NgramsType
Int
))
->
Cmd
err
(
HashMap
ExtractedNgrams
(
Map
NgramsType
Int
,
TermsCount
))
extractNgramsT'
lang'
doc
=
do
extractNgramsT'
lang'
doc
=
do
let
source
=
text2ngrams
let
source
=
text2ngrams
$
maybe
"Nothing"
identity
$
maybe
"Nothing"
identity
...
@@ -515,23 +521,25 @@ instance ExtractNgramsT HyperdataDocument
...
@@ -515,23 +521,25 @@ instance ExtractNgramsT HyperdataDocument
$
maybe
[
"Nothing"
]
(
T
.
splitOn
", "
)
$
maybe
[
"Nothing"
]
(
T
.
splitOn
", "
)
$
_hd_authors
doc
$
_hd_authors
doc
terms'
<-
map
(
enrichedTerms
(
lang'
^.
tt_lang
)
CoreNLP
NP
)
termsWithCounts'
<-
map
(
\
(
t
,
cnt
)
->
(
enrichedTerms
(
lang'
^.
tt_lang
)
CoreNLP
NP
t
,
cnt
))
<$>
concat
<$>
concat
<$>
liftBase
(
extractTerms
lang'
$
hasText
doc
)
<$>
liftBase
(
extractTerms
lang'
$
hasText
doc
)
printDebug
"[extractNgramsT HyperdataDocument] termsWithCounts'"
termsWithCounts'
printDebug
"[extractNgramsT HyperdataDocument] termsWithLargerCounts"
$
filter
(
\
(
_
,
cnt
)
->
cnt
>
1
)
termsWithCounts'
pure
$
HashMap
.
fromList
pure
$
HashMap
.
fromList
$
[(
SimpleNgrams
source
,
Map
.
singleton
Sources
1
)
]
$
[(
SimpleNgrams
source
,
(
Map
.
singleton
Sources
1
,
1
)
)
]
<>
[(
SimpleNgrams
i'
,
Map
.
singleton
Institutes
1
)
|
i'
<-
institutes
]
<>
[(
SimpleNgrams
i'
,
(
Map
.
singleton
Institutes
1
,
1
)
)
|
i'
<-
institutes
]
<>
[(
SimpleNgrams
a'
,
Map
.
singleton
Authors
1
)
|
a'
<-
authors
]
<>
[(
SimpleNgrams
a'
,
(
Map
.
singleton
Authors
1
,
1
)
)
|
a'
<-
authors
]
<>
[(
EnrichedNgrams
t'
,
Map
.
singleton
NgramsTerms
1
)
|
t'
<-
term
s'
]
<>
[(
EnrichedNgrams
t'
,
(
Map
.
singleton
NgramsTerms
1
,
cnt'
))
|
(
t'
,
cnt'
)
<-
termsWithCount
s'
]
instance
(
ExtractNgramsT
a
,
HasText
a
)
=>
ExtractNgramsT
(
Node
a
)
instance
(
ExtractNgramsT
a
,
HasText
a
)
=>
ExtractNgramsT
(
Node
a
)
where
where
extractNgramsT
l
(
Node
_
_
_
_
_
_
_
h
)
=
extractNgramsT
l
h
extractNgramsT
l
(
Node
{
_node_hyperdata
=
h
}
)
=
extractNgramsT
l
h
instance
HasText
a
=>
HasText
(
Node
a
)
instance
HasText
a
=>
HasText
(
Node
a
)
where
where
hasText
(
Node
_
_
_
_
_
_
_
h
)
=
hasText
h
hasText
(
Node
{
_node_hyperdata
=
h
}
)
=
hasText
h
...
...
src/Gargantext/Database/Action/Flow/Utils.hs
View file @
3b163685
...
@@ -15,6 +15,7 @@ module Gargantext.Database.Action.Flow.Utils
...
@@ -15,6 +15,7 @@ module Gargantext.Database.Action.Flow.Utils
import
Data.Map
(
Map
)
import
Data.Map
(
Map
)
import
Data.HashMap.Strict
(
HashMap
)
import
Data.HashMap.Strict
(
HashMap
)
import
Gargantext.Core.Types
(
TermsCount
)
import
Gargantext.Database.Admin.Types.Node
import
Gargantext.Database.Admin.Types.Node
import
Gargantext.Database.Prelude
(
Cmd
)
import
Gargantext.Database.Prelude
(
Cmd
)
import
Gargantext.Database.Query.Table.ContextNodeNgrams
import
Gargantext.Database.Query.Table.ContextNodeNgrams
...
@@ -29,24 +30,24 @@ import qualified Data.HashMap.Strict as HashMap
...
@@ -29,24 +30,24 @@ import qualified Data.HashMap.Strict as HashMap
data
DocumentIdWithNgrams
a
b
=
data
DocumentIdWithNgrams
a
b
=
DocumentIdWithNgrams
DocumentIdWithNgrams
{
documentWithId
::
Indexed
NodeId
a
{
documentWithId
::
Indexed
NodeId
a
,
documentNgrams
::
HashMap
b
(
Map
NgramsType
Int
)
,
documentNgrams
::
HashMap
b
(
Map
NgramsType
Int
,
TermsCount
)
}
deriving
(
Show
)
}
deriving
(
Show
)
insertDocNgrams
::
ListId
insertDocNgrams
::
ListId
->
HashMap
(
Indexed
NgramsId
Ngrams
)
(
Map
NgramsType
(
Map
DocId
Int
))
->
HashMap
(
Indexed
NgramsId
Ngrams
)
(
Map
NgramsType
(
Map
DocId
(
Int
,
TermsCount
)
))
->
Cmd
err
Int
->
Cmd
err
Int
insertDocNgrams
lId
m
=
insertContextNodeNgrams
ns
insertDocNgrams
lId
m
=
do
printDebug
"[insertDocNgrams] ns"
ns
insertContextNodeNgrams
ns
where
where
ns
=
[
ContextNodeNgrams
docId
lId
(
ng
^.
index
)
ns
=
[
ContextNodeNgrams
docId
lId
(
ng
^.
index
)
(
ngramsTypeId
t
)
(
ngramsTypeId
t
)
(
fromIntegral
i
)
(
fromIntegral
i
)
cnt
|
(
ng
,
t2n2i
)
<-
HashMap
.
toList
m
|
(
ng
,
t2n2i
)
<-
HashMap
.
toList
m
,
(
t
,
n2i
)
<-
DM
.
toList
t2n2i
,
(
t
,
n2i
)
<-
DM
.
toList
t2n2i
,
(
docId
,
i
)
<-
DM
.
toList
n2i
,
(
docId
,
(
i
,
cnt
)
)
<-
DM
.
toList
n2i
]
]
-- [(NodeId, {Ngrams: ({NgramsType: Int}, TermsCount)})]
-- {Ngrams: {NgramsType: {NodeId: (Int, TermsCount)}}}
src/Gargantext/Database/Query/Table/ContextNodeNgrams.hs
View file @
3b163685
...
@@ -36,12 +36,13 @@ queryContextNodeNgramsTable = selectTable contextNodeNgramsTable
...
@@ -36,12 +36,13 @@ queryContextNodeNgramsTable = selectTable contextNodeNgramsTable
-- | Insert utils
-- | Insert utils
insertContextNodeNgrams
::
[
ContextNodeNgrams
]
->
Cmd
err
Int
insertContextNodeNgrams
::
[
ContextNodeNgrams
]
->
Cmd
err
Int
insertContextNodeNgrams
=
insertContextNodeNgramsW
insertContextNodeNgrams
=
insertContextNodeNgramsW
.
map
(
\
(
ContextNodeNgrams
c
n
ng
nt
w
)
->
.
map
(
\
(
ContextNodeNgrams
c
n
ng
nt
w
dc
)
->
ContextNodeNgrams
(
pgContextId
c
)
ContextNodeNgrams
(
pgContextId
c
)
(
pgNodeId
n
)
(
pgNodeId
n
)
(
sqlInt4
ng
)
(
sqlInt4
ng
)
(
pgNgramsTypeId
nt
)
(
pgNgramsTypeId
nt
)
(
sqlDouble
w
)
(
sqlDouble
w
)
(
sqlInt4
dc
)
)
)
insertContextNodeNgramsW
::
[
ContextNodeNgramsWrite
]
->
Cmd
err
Int
insertContextNodeNgramsW
::
[
ContextNodeNgramsWrite
]
->
Cmd
err
Int
...
...
src/Gargantext/Database/Query/Table/Node/UpdateOpaleye.hs
View file @
3b163685
...
@@ -35,8 +35,9 @@ updateHyperdata i h = mkCmd $ \c -> putStrLn "before runUpdate_" >>
...
@@ -35,8 +35,9 @@ updateHyperdata i h = mkCmd $ \c -> putStrLn "before runUpdate_" >>
updateHyperdataQuery
::
ToJSON
a
=>
NodeId
->
a
->
Update
Int64
updateHyperdataQuery
::
ToJSON
a
=>
NodeId
->
a
->
Update
Int64
updateHyperdataQuery
i
h
=
seq
h'
$
trace
"updateHyperdataQuery: encoded JSON"
$
Update
updateHyperdataQuery
i
h
=
seq
h'
$
trace
"updateHyperdataQuery: encoded JSON"
$
Update
{
uTable
=
nodeTable
{
uTable
=
nodeTable
,
uUpdateWith
=
updateEasy
(
\
(
Node
_ni
_nh
_nt
_nu
_np
_nn
_nd
_h
)
,
uUpdateWith
=
updateEasy
(
\
(
Node
{
..
})
->
trace
"updating mate"
$
Node
_ni
_nh
_nt
_nu
_np
_nn
_nd
h'
->
Node
{
_node_hyperdata
=
h'
,
..
}
-- -> trace "updating mate" $ Node _ni _nh _nt _nu _np _nn _nd h'
)
)
,
uWhere
=
(
\
row
->
{-trace "uWhere" $-}
_node_id
row
.==
pgNodeId
i
)
,
uWhere
=
(
\
row
->
{-trace "uWhere" $-}
_node_id
row
.==
pgNodeId
i
)
,
uReturning
=
rCount
,
uReturning
=
rCount
...
@@ -63,5 +64,3 @@ updateNodesWithType_ :: ( HasNodeError err
...
@@ -63,5 +64,3 @@ updateNodesWithType_ :: ( HasNodeError err
updateNodesWithType_
nt
h
=
do
updateNodesWithType_
nt
h
=
do
ns
<-
getNodesIdWithType
nt
ns
<-
getNodesIdWithType
nt
mapM
(
\
n
->
updateHyperdata
n
h
)
ns
mapM
(
\
n
->
updateHyperdata
n
h
)
ns
src/Gargantext/Database/Schema/ContextNodeNgrams.hs
View file @
3b163685
...
@@ -19,6 +19,7 @@ module Gargantext.Database.Schema.ContextNodeNgrams
...
@@ -19,6 +19,7 @@ module Gargantext.Database.Schema.ContextNodeNgrams
where
where
import
Prelude
import
Prelude
import
Gargantext.Core.Types
(
TermsCount
)
import
Gargantext.Database.Schema.Prelude
import
Gargantext.Database.Schema.Prelude
import
Gargantext.Database.Schema.Ngrams
(
NgramsTypeId
,
NgramsId
)
import
Gargantext.Database.Schema.Ngrams
(
NgramsTypeId
,
NgramsId
)
import
Gargantext.Database.Admin.Types.Node
import
Gargantext.Database.Admin.Types.Node
...
@@ -26,15 +27,16 @@ import Gargantext.Database.Admin.Types.Node
...
@@ -26,15 +27,16 @@ import Gargantext.Database.Admin.Types.Node
type
ContextNodeNgrams
=
type
ContextNodeNgrams
=
ContextNodeNgramsPoly
ContextId
ListId
NgramsId
NgramsTypeId
Double
ContextNodeNgramsPoly
ContextId
ListId
NgramsId
NgramsTypeId
Double
TermsCount
data
ContextNodeNgramsPoly
c
n
ngrams_id
ngt
w
data
ContextNodeNgramsPoly
c
n
ngrams_id
ngt
w
dc
=
ContextNodeNgrams
{
_cnng_context_id
::
!
c
=
ContextNodeNgrams
{
_cnng_context_id
::
!
c
,
_cnng_node_id
::
!
n
,
_cnng_node_id
::
!
n
,
_cnng_ngrams_id
::
!
ngrams_id
,
_cnng_ngrams_id
::
!
ngrams_id
,
_cnng_ngramsType
::
!
ngt
,
_cnng_ngramsType
::
!
ngt
,
_cnng_weight
::
!
w
,
_cnng_weight
::
!
w
,
_cnng_doc_count
::
!
dc
}
deriving
(
Show
)
}
deriving
(
Show
)
type
ContextNodeNgramsWrite
=
type
ContextNodeNgramsWrite
=
...
@@ -43,6 +45,7 @@ type ContextNodeNgramsWrite =
...
@@ -43,6 +45,7 @@ type ContextNodeNgramsWrite =
(
Column
SqlInt4
)
(
Column
SqlInt4
)
(
Column
SqlInt4
)
(
Column
SqlInt4
)
(
Column
SqlFloat8
)
(
Column
SqlFloat8
)
(
Column
SqlInt4
)
type
ContextNodeNgramsRead
=
type
ContextNodeNgramsRead
=
ContextNodeNgramsPoly
(
Column
SqlInt4
)
ContextNodeNgramsPoly
(
Column
SqlInt4
)
...
@@ -50,6 +53,7 @@ type ContextNodeNgramsRead =
...
@@ -50,6 +53,7 @@ type ContextNodeNgramsRead =
(
Column
SqlInt4
)
(
Column
SqlInt4
)
(
Column
SqlInt4
)
(
Column
SqlInt4
)
(
Column
SqlFloat8
)
(
Column
SqlFloat8
)
(
Column
SqlInt4
)
type
ContextNodeNgramsReadNull
=
type
ContextNodeNgramsReadNull
=
ContextNodeNgramsPoly
(
Column
(
Nullable
SqlInt4
))
ContextNodeNgramsPoly
(
Column
(
Nullable
SqlInt4
))
...
@@ -57,6 +61,7 @@ type ContextNodeNgramsReadNull =
...
@@ -57,6 +61,7 @@ type ContextNodeNgramsReadNull =
(
Column
(
Nullable
SqlInt4
))
(
Column
(
Nullable
SqlInt4
))
(
Column
(
Nullable
SqlInt4
))
(
Column
(
Nullable
SqlInt4
))
(
Column
(
Nullable
SqlFloat8
))
(
Column
(
Nullable
SqlFloat8
))
(
Column
(
Nullable
SqlInt4
))
$
(
makeAdaptorAndInstance
"pContextNodeNgrams"
''
C
ontextNodeNgramsPoly
)
$
(
makeAdaptorAndInstance
"pContextNodeNgrams"
''
C
ontextNodeNgramsPoly
)
makeLenses
''
C
ontextNodeNgramsPoly
makeLenses
''
C
ontextNodeNgramsPoly
...
@@ -70,5 +75,6 @@ contextNodeNgramsTable = Table "context_node_ngrams"
...
@@ -70,5 +75,6 @@ contextNodeNgramsTable = Table "context_node_ngrams"
,
_cnng_ngrams_id
=
requiredTableField
"ngrams_id"
,
_cnng_ngrams_id
=
requiredTableField
"ngrams_id"
,
_cnng_ngramsType
=
requiredTableField
"ngrams_type"
,
_cnng_ngramsType
=
requiredTableField
"ngrams_type"
,
_cnng_weight
=
requiredTableField
"weight"
,
_cnng_weight
=
requiredTableField
"weight"
,
_cnng_doc_count
=
requiredTableField
"doc_count"
}
}
)
)
src/Gargantext/Database/Schema/NodeNgrams.hs
View file @
3b163685
{-|
{-|
Module : Gargantext.Database.Schema.NodeNgrams
Module : Gargantext.Database.Schema.NodeNgrams
Description :
Description :
Copyright : (c) CNRS, 2017-Present
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Maintainer : team@gargantext.org
...
@@ -86,7 +86,7 @@ type NgramsClass = Int
...
@@ -86,7 +86,7 @@ type NgramsClass = Int
type
NgramsText
=
Text
type
NgramsText
=
Text
-- Example of list Ngrams
-- Example of list Ngrams
-- type ListNgrams = NodeNgramsPoly (Maybe Int) ListType Text
-- type ListNgrams = NodeNgramsPoly (Maybe Int) ListType Text
type
NodeNgramsW
=
type
NodeNgramsW
=
NodeNgramsPoly
(
Maybe
NodeNgramsId
)
NodeId
ListType
NgramsText
NodeNgramsPoly
(
Maybe
NodeNgramsId
)
NodeId
ListType
NgramsText
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment