Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
H
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Przemyslaw Kaminski
haskell-gargantext
Commits
98e64947
Commit
98e64947
authored
Dec 14, 2020
by
Nicolas Pouillard
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Continue refactoring...
parent
d37798c1
Changes
10
Show whitespace changes
Inline
Side-by-side
Showing
10 changed files
with
196 additions
and
170 deletions
+196
-170
Tools.hs
src/Gargantext/API/Ngrams/Tools.hs
+64
-44
Types.hs
src/Gargantext/API/Ngrams/Types.hs
+1
-1
Export.hs
src/Gargantext/API/Node/Corpus/Export.hs
+2
-1
List.hs
src/Gargantext/Core/Text/List.hs
+5
-5
API.hs
src/Gargantext/Core/Viz/Graph/API.hs
+1
-1
Pairing.hs
src/Gargantext/Database/Action/Flow/Pairing.hs
+40
-39
Metrics.hs
src/Gargantext/Database/Action/Metrics.hs
+5
-5
NgramsByNode.hs
src/Gargantext/Database/Action/Metrics/NgramsByNode.hs
+62
-57
TFICF.hs
src/Gargantext/Database/Action/Metrics/TFICF.hs
+14
-16
Node.hs
src/Gargantext/Database/Admin/Types/Node.hs
+2
-1
No files found.
src/Gargantext/API/Ngrams/Tools.hs
View file @
98e64947
...
...
@@ -9,13 +9,18 @@ Portability : POSIX
-}
{-# LANGUAGE TypeFamilies #-}
module
Gargantext.API.Ngrams.Tools
where
import
Control.Concurrent
import
Control.Lens
(
_Just
,
(
^.
),
at
,
view
)
import
Control.Lens
(
_Just
,
(
^.
),
at
,
view
,
At
,
Index
,
IxValue
)
import
Control.Monad.Reader
import
Data.Hashable
(
Hashable
)
import
Data.HashMap.Strict
(
HashMap
)
import
qualified
Data.HashMap.Strict
as
HM
import
Gargantext.Data.HashMap.Strict.Utils
as
HM
import
Data.Map.Strict
(
Map
)
import
qualified
Data.Map.Strict
as
Map
import
qualified
Data.Set
as
Set
...
...
@@ -31,7 +36,7 @@ import Gargantext.Prelude
mergeNgramsElement
::
NgramsRepoElement
->
NgramsRepoElement
->
NgramsRepoElement
mergeNgramsElement
_neOld
neNew
=
neNew
type
RootTerm
=
Text
type
RootTerm
=
NgramsTerm
getRepo
::
RepoCmdM
env
err
m
=>
m
NgramsRepo
getRepo
=
do
...
...
@@ -39,8 +44,8 @@ getRepo = do
liftBase
$
readMVar
v
listNgramsFromRepo
::
[
ListId
]
->
NgramsType
->
NgramsRepo
->
Map
Text
NgramsRepoElement
listNgramsFromRepo
nodeIds
ngramsType
repo
=
Map
.
mapKeys
unNgramsTerm
ngrams
->
NgramsRepo
->
Map
NgramsTerm
NgramsRepoElement
listNgramsFromRepo
nodeIds
ngramsType
repo
=
ngrams
where
ngramsMap
=
repo
^.
r_state
.
at
ngramsType
.
_Just
...
...
@@ -53,73 +58,88 @@ listNgramsFromRepo nodeIds ngramsType repo = Map.mapKeys unNgramsTerm ngrams
-- be properly guarded.
getListNgrams
::
RepoCmdM
env
err
m
=>
[
ListId
]
->
NgramsType
->
m
(
Map
Text
NgramsRepoElement
)
->
m
(
Map
NgramsTerm
NgramsRepoElement
)
getListNgrams
nodeIds
ngramsType
=
listNgramsFromRepo
nodeIds
ngramsType
<$>
getRepo
getTermsWith
::
(
RepoCmdM
env
err
m
,
Ord
a
)
=>
(
Text
->
a
)
->
[
ListId
]
getTermsWith
::
(
RepoCmdM
env
err
m
,
Eq
a
,
Hashable
a
)
=>
(
NgramsTerm
->
a
)
->
[
ListId
]
->
NgramsType
->
ListType
->
m
(
Map
a
[
a
])
getTermsWith
f
ls
ngt
lt
=
Map
.
fromListWith
(
<>
)
<$>
map
(
toTreeWith
f
)
->
m
(
Hash
Map
a
[
a
])
getTermsWith
f
ls
ngt
lt
=
HM
.
fromListWith
(
<>
)
<$>
map
toTreeWith
<$>
Map
.
toList
<$>
Map
.
filter
(
\
f'
->
(
fst
f'
)
==
lt
)
<$>
Map
.
filter
(
\
f'
->
fst
f'
==
lt
)
<$>
mapTermListRoot
ls
ngt
<$>
getRepo
where
toTreeWith
f''
(
t
,
(
_lt
,
maybeRoot
))
=
case
maybeRoot
of
Nothing
->
(
f
''
t
,
[]
)
Just
r
->
(
f
''
r
,
map
f''
[
t
])
toTreeWith
(
t
,
(
_lt
,
maybeRoot
))
=
case
maybeRoot
of
Nothing
->
(
f
t
,
[]
)
Just
r
->
(
f
r
,
[
f
t
])
mapTermListRoot
::
[
ListId
]
->
NgramsType
->
NgramsRepo
->
Map
Text
(
ListType
,
(
Maybe
Text
)
)
->
Map
NgramsTerm
(
ListType
,
Maybe
NgramsTerm
)
mapTermListRoot
nodeIds
ngramsType
repo
=
Map
.
fromList
[
(
t
,
(
_nre_list
nre
,
unNgramsTerm
<$>
_nre_root
nre
))
|
(
t
,
nre
)
<-
Map
.
toList
ngrams
]
where
ngrams
=
listNgramsFromRepo
nodeIds
ngramsType
repo
(
\
nre
->
(
_nre_list
nre
,
_nre_root
nre
))
<$>
listNgramsFromRepo
nodeIds
ngramsType
repo
filterListWithRootHashMap
::
ListType
->
HashMap
NgramsTerm
(
ListType
,
Maybe
NgramsTerm
)
->
HashMap
NgramsTerm
(
Maybe
RootTerm
)
filterListWithRootHashMap
lt
m
=
snd
<$>
HM
.
filter
isMapTerm
m
where
isMapTerm
(
l
,
maybeRoot
)
=
case
maybeRoot
of
Nothing
->
l
==
lt
Just
r
->
case
HM
.
lookup
r
m
of
Nothing
->
panic
$
"Garg.API.Ngrams.Tools: filterWithRoot, unknown key: "
<>
unNgramsTerm
r
Just
(
l'
,
_
)
->
l'
==
lt
filterListWithRoot
::
ListType
->
Map
Text
(
ListType
,
Maybe
Text
)
->
Map
Text
(
Maybe
RootTerm
)
filterListWithRoot
lt
m
=
Map
.
fromList
$
map
(
\
(
t
,(
_
,
r
))
->
(
t
,
r
))
$
filter
isMapTerm
(
Map
.
toList
m
)
->
Map
NgramsTerm
(
ListType
,
Maybe
NgramsTerm
)
->
Map
NgramsTerm
(
Maybe
RootTerm
)
filterListWithRoot
lt
m
=
snd
<$>
Map
.
filter
isMapTerm
m
where
isMapTerm
(
_t
,(
l
,
maybeRoot
)
)
=
case
maybeRoot
of
isMapTerm
(
l
,
maybeRoot
)
=
case
maybeRoot
of
Nothing
->
l
==
lt
Just
r
->
case
Map
.
lookup
r
m
of
Nothing
->
panic
$
"Garg.API.Ngrams.Tools: filterWithRoot, unknown key: "
<>
r
Nothing
->
panic
$
"Garg.API.Ngrams.Tools: filterWithRoot, unknown key: "
<>
unNgramsTerm
r
Just
(
l'
,
_
)
->
l'
==
lt
groupNodesByNgrams
::
Map
Text
(
Maybe
RootTerm
)
->
Map
Text
(
Set
NodeId
)
->
Map
Text
(
Set
NodeId
)
groupNodesByNgrams
syn
occs
=
Map
.
fromListWith
(
<>
)
occs'
groupNodesByNgrams
::
(
At
root_map
,
Index
root_map
~
NgramsTerm
,
IxValue
root_map
~
Maybe
RootTerm
)
=>
root_map
->
HashMap
NgramsTerm
(
Set
NodeId
)
->
HashMap
NgramsTerm
(
Set
NodeId
)
groupNodesByNgrams
syn
occs
=
HM
.
fromListWith
(
<>
)
occs'
where
occs'
=
map
toSyn
(
Map
.
toList
occs
)
toSyn
(
t
,
ns
)
=
case
Map
.
lookup
t
syn
of
Nothing
->
panic
$
"[Garg.API.Ngrams.Tools.groupNodesByNgrams] unknown key: "
<>
t
occs'
=
map
toSyn
(
HM
.
toList
occs
)
toSyn
(
t
,
ns
)
=
case
syn
^.
at
t
of
Nothing
->
panic
$
"[Garg.API.Ngrams.Tools.groupNodesByNgrams] unknown key: "
<>
unNgramsTerm
t
Just
r
->
case
r
of
Nothing
->
(
t
,
ns
)
Just
r'
->
(
r'
,
ns
)
data
Diagonal
=
Diagonal
Bool
getCoocByNgrams
::
Diagonal
->
Map
Text
(
Set
NodeId
)
->
Map
(
Text
,
Text
)
Int
getCoocByNgrams
::
Diagonal
->
HashMap
Text
(
Set
NodeId
)
->
Hash
Map
(
Text
,
Text
)
Int
getCoocByNgrams
=
getCoocByNgrams'
identity
getCoocByNgrams'
::
(
Ord
a
,
Ord
c
)
=>
(
b
->
Set
c
)
->
Diagonal
->
Map
a
b
->
Map
(
a
,
a
)
Int
getCoocByNgrams'
::
(
Hashable
a
,
Ord
a
,
Ord
c
)
=>
(
b
->
Set
c
)
->
Diagonal
->
HashMap
a
b
->
Hash
Map
(
a
,
a
)
Int
getCoocByNgrams'
f
(
Diagonal
diag
)
m
=
Map
.
fromList
[(
(
t1
,
t2
)
HM
.
fromList
[(
(
t1
,
t2
)
,
maybe
0
Set
.
size
$
Set
.
intersection
<$>
(
fmap
f
$
Map
.
lookup
t1
m
)
<*>
(
fmap
f
$
Map
.
lookup
t2
m
)
)
|
(
t1
,
t2
)
<-
case
diag
of
True
->
[
(
x
,
y
)
|
x
<-
Map
.
keys
m
,
y
<-
Map
.
keys
m
,
x
<=
y
]
False
->
listToCombi
identity
(
Map
.
keys
m
)
<$>
(
fmap
f
$
HM
.
lookup
t1
m
)
<*>
(
fmap
f
$
HM
.
lookup
t2
m
)
)
|
(
t1
,
t2
)
<-
if
diag
then
[
(
x
,
y
)
|
x
<-
ks
,
y
<-
ks
,
x
<=
y
]
-- TODO if we keep a Data.Map here it might be
-- more efficient to enumerate all the y <= x.
else
listToCombi
identity
ks
]
where
ks
=
HM
.
keys
m
\ No newline at end of file
src/Gargantext/API/Ngrams/Types.hs
View file @
98e64947
...
...
@@ -124,7 +124,7 @@ instance (ToJSONKey a, ToSchema a) => ToSchema (MSet a) where
------------------------------------------------------------------------
newtype
NgramsTerm
=
NgramsTerm
{
unNgramsTerm
::
Text
}
deriving
(
Ord
,
Eq
,
Show
,
Generic
,
ToJSONKey
,
ToJSON
,
FromJSON
,
Semigroup
,
Arbitrary
,
Serialise
,
ToSchema
)
deriving
(
Ord
,
Eq
,
Show
,
Generic
,
ToJSONKey
,
ToJSON
,
FromJSON
,
Semigroup
,
Arbitrary
,
Serialise
,
ToSchema
,
Hashable
)
instance
FromJSONKey
NgramsTerm
where
fromJSONKey
=
FromJSONKeyTextParser
$
\
t
->
pure
$
NgramsTerm
$
strip
t
...
...
src/Gargantext/API/Node/Corpus/Export.hs
View file @
98e64947
...
...
@@ -17,6 +17,7 @@ module Gargantext.API.Node.Corpus.Export
where
import
Data.HashMap.Strict
(
HashMap
)
import
Data.Map
(
Map
)
import
Data.Maybe
(
fromMaybe
)
import
Data.Set
(
Set
)
...
...
@@ -76,7 +77,7 @@ getNodeNgrams :: HasNodeError err
->
Maybe
ListId
->
NgramsType
->
NgramsRepo
->
Cmd
err
(
Map
NodeId
(
Set
Text
))
->
Cmd
err
(
Hash
Map
NodeId
(
Set
Text
))
getNodeNgrams
cId
lId'
nt
repo
=
do
lId
<-
case
lId'
of
Nothing
->
defaultList
cId
...
...
src/Gargantext/Core/Text/List.hs
View file @
98e64947
...
...
@@ -182,19 +182,19 @@ buildNgramsTermsList user uCid mCid groupParams (nt, _mapListSize)= do
selectedTerms
let
groupedTreeScores_SetNodeId
::
Map
Text
(
GroupedTreeScores
(
Set
NodeId
))
groupedTreeScores_SetNodeId
::
Hash
Map
Text
(
GroupedTreeScores
(
Set
NodeId
))
groupedTreeScores_SetNodeId
=
setScoresWithMap
mapTextDocIds
(
groupedMonoHead
<>
groupedMultHead
)
-- | Coocurrences computation
--, t1 >= t2 -- permute byAxis diag -- since matrix symmetric
let
mapCooc
=
Map
.
filter
(
>
2
)
$
Map
.
fromList
[
((
t1
,
t2
),
Set
.
size
$
Set
.
intersection
s1
s2
)
let
mapCooc
=
HM
.
filter
(
>
2
)
$
HM
.
fromList
[
((
t1
,
t2
),
Set
.
size
$
Set
.
intersection
s1
s2
)
|
(
t1
,
s1
)
<-
mapStemNodeIds
,
(
t2
,
s2
)
<-
mapStemNodeIds
]
where
mapStemNodeIds
=
Map
.
toList
$
Map
.
map
viewScores
mapStemNodeIds
=
HM
.
toList
$
HM
.
map
viewScores
$
groupedTreeScores_SetNodeId
let
-- computing scores
...
...
src/Gargantext/Core/Viz/Graph/API.hs
View file @
98e64947
...
...
@@ -150,7 +150,7 @@ computeGraph cId d nt repo = do
let
ngs
=
filterListWithRoot
MapTerm
$
mapTermListRoot
[
lId
]
nt
repo
-- TODO split diagonal
myCooc
<-
Map
.
filter
(
>
1
)
myCooc
<-
HM
.
filter
(
>
1
)
<$>
getCoocByNgrams
(
Diagonal
True
)
<$>
groupNodesByNgrams
ngs
<$>
getNodesByNgramsOnlyUser
cId
(
lIds
<>
[
lId
])
nt
(
Map
.
keys
ngs
)
...
...
src/Gargantext/Database/Action/Flow/Pairing.hs
View file @
98e64947
...
...
@@ -17,13 +17,14 @@ module Gargantext.Database.Action.Flow.Pairing
where
import
Control.Lens
(
_Just
,
(
^.
))
import
Data.Map
(
Map
,
fromList
,
fromListWith
)
import
Data.HashMap.Strict
(
HashMap
)
import
qualified
Data.HashMap.Strict
as
HM
import
Data.Maybe
(
catMaybes
,
fromMaybe
)
import
Data.Set
(
Set
)
import
Data.Text
(
Text
)
import
Gargantext.API.Ngrams.Tools
import
Gargantext.API.Ngrams.Types
(
NgramsTerm
(
..
))
import
Gargantext.API.Prelude
(
GargNoServer
)
import
Gargantext.Core.Types
(
TableResult
(
..
)
,
Term
)
import
Gargantext.Core.Types
(
TableResult
(
..
))
import
Gargantext.Core.Types.Main
import
Gargantext.Database
import
Gargantext.Database.Action.Metrics.NgramsByNode
(
getNodesByNgramsOnlyUser
)
...
...
@@ -79,7 +80,7 @@ dataPairing :: AnnuaireId
->
(
CorpusId
,
ListId
,
NgramsType
)
->
(
ContactName
->
Projected
)
->
(
DocAuthor
->
Projected
)
->
GargNoServer
(
Map
ContactId
(
Set
DocId
))
->
GargNoServer
(
Hash
Map
ContactId
(
Set
DocId
))
dataPairing
aId
(
cId
,
lId
,
ngt
)
fc
fa
=
do
mc
<-
getNgramsContactId
aId
md
<-
getNgramsDocId
cId
lId
ngt
...
...
@@ -87,14 +88,14 @@ dataPairing aId (cId, lId, ngt) fc fa = do
printDebug
"ngramsContactId"
mc
printDebug
"ngramsDocId"
md
let
from
=
projectionFrom
(
Set
.
fromList
$
Map
.
keys
mc
)
fc
to
=
projectionTo
(
Set
.
fromList
$
Map
.
keys
md
)
fa
from
=
projectionFrom
(
Set
.
fromList
$
HM
.
keys
mc
)
fc
to
=
projectionTo
(
Set
.
fromList
$
HM
.
keys
md
)
fa
pure
$
fusion
mc
$
align
from
to
md
prepareInsert
::
Map
ContactId
(
Set
DocId
)
->
[
NodeNode
]
prepareInsert
::
Hash
Map
ContactId
(
Set
DocId
)
->
[
NodeNode
]
prepareInsert
m
=
map
(
\
(
n1
,
n2
)
->
NodeNode
n1
n2
Nothing
Nothing
)
$
List
.
concat
$
map
(
\
(
contactId
,
setDocIds
)
...
...
@@ -102,21 +103,21 @@ prepareInsert m = map (\(n1,n2) -> NodeNode n1 n2 Nothing Nothing)
->
(
contactId
,
setDocId
)
)
$
Set
.
toList
setDocIds
)
$
Map
.
toList
m
$
HM
.
toList
m
------------------------------------------------------------------------
type
ContactName
=
Text
type
DocAuthor
=
Text
type
Projected
=
Text
type
ContactName
=
NgramsTerm
type
DocAuthor
=
NgramsTerm
type
Projected
=
NgramsTerm
projectionFrom
::
Set
ContactName
->
(
ContactName
->
Projected
)
->
Map
ContactName
Projected
projectionFrom
ss
f
=
fromList
$
map
(
\
s
->
(
s
,
f
s
))
(
Set
.
toList
ss
)
projectionFrom
::
Set
ContactName
->
(
ContactName
->
Projected
)
->
Hash
Map
ContactName
Projected
projectionFrom
ss
f
=
HM
.
fromList
$
map
(
\
s
->
(
s
,
f
s
))
(
Set
.
toList
ss
)
-- use HS.toMap
projectionTo
::
Set
DocAuthor
->
(
DocAuthor
->
Projected
)
->
Map
Projected
(
Set
DocAuthor
)
projectionTo
ss
f
=
fromListWith
(
<>
)
$
map
(
\
s
->
(
f
s
,
Set
.
singleton
s
))
(
Set
.
toList
ss
)
projectionTo
::
Set
DocAuthor
->
(
DocAuthor
->
Projected
)
->
Hash
Map
Projected
(
Set
DocAuthor
)
projectionTo
ss
f
=
HM
.
fromListWith
(
<>
)
$
map
(
\
s
->
(
f
s
,
Set
.
singleton
s
))
(
Set
.
toList
ss
)
-- use HS.toMap
------------------------------------------------------------------------
takeName
::
Term
->
Term
takeName
texte
=
DT
.
toLower
texte'
takeName
::
NgramsTerm
->
Ngrams
Term
takeName
(
NgramsTerm
texte
)
=
NgramsTerm
$
DT
.
toLower
texte'
where
texte'
=
maybe
texte
(
\
x
->
if
DT
.
length
x
>
3
then
x
else
texte
)
(
lastName'
texte
)
...
...
@@ -124,51 +125,51 @@ takeName texte = DT.toLower texte'
------------------------------------------------------------------------
align
::
Map
ContactName
Projected
->
Map
Projected
(
Set
DocAuthor
)
->
Map
DocAuthor
(
Set
DocId
)
->
Map
ContactName
(
Set
DocId
)
align
mc
ma
md
=
fromListWith
(
<>
)
align
::
Hash
Map
ContactName
Projected
->
Hash
Map
Projected
(
Set
DocAuthor
)
->
Hash
Map
DocAuthor
(
Set
DocId
)
->
Hash
Map
ContactName
(
Set
DocId
)
align
mc
ma
md
=
HM
.
fromListWith
(
<>
)
$
map
(
\
c
->
(
c
,
getProjection
md
$
testProjection
c
mc
ma
))
$
Map
.
keys
mc
$
HM
.
keys
mc
where
getProjection
::
Map
DocAuthor
(
Set
DocId
)
->
Set
DocAuthor
->
Set
DocId
getProjection
::
Hash
Map
DocAuthor
(
Set
DocId
)
->
Set
DocAuthor
->
Set
DocId
getProjection
ma'
sa'
=
if
Set
.
null
sa'
then
Set
.
empty
else
Set
.
unions
$
sets
ma'
sa'
where
sets
ma''
sa''
=
Set
.
map
(
\
s
->
lookup
s
ma''
)
sa''
lookup
s'
ma''
=
fromMaybe
Set
.
empty
(
Map
.
lookup
s'
ma''
)
lookup
s'
ma''
=
fromMaybe
Set
.
empty
(
HM
.
lookup
s'
ma''
)
testProjection
::
ContactName
->
Map
ContactName
Projected
->
Map
Projected
(
Set
DocAuthor
)
->
Hash
Map
ContactName
Projected
->
Hash
Map
Projected
(
Set
DocAuthor
)
->
Set
DocAuthor
testProjection
cn'
mc'
ma'
=
case
Map
.
lookup
cn'
mc'
of
testProjection
cn'
mc'
ma'
=
case
HM
.
lookup
cn'
mc'
of
Nothing
->
Set
.
empty
Just
c
->
case
Map
.
lookup
c
ma'
of
Just
c
->
case
HM
.
lookup
c
ma'
of
Nothing
->
Set
.
empty
Just
a
->
a
fusion
::
Map
ContactName
(
Set
ContactId
)
->
Map
ContactName
(
Set
DocId
)
->
Map
ContactId
(
Set
DocId
)
fusion
mc
md
=
Map
.
fromListWith
(
<>
)
fusion
::
Hash
Map
ContactName
(
Set
ContactId
)
->
Hash
Map
ContactName
(
Set
DocId
)
->
Hash
Map
ContactId
(
Set
DocId
)
fusion
mc
md
=
HM
.
fromListWith
(
<>
)
$
catMaybes
$
[
(,)
<$>
Just
cId
<*>
Map
.
lookup
cn
md
|
(
cn
,
setContactId
)
<-
Map
.
toList
mc
$
[
(,)
<$>
Just
cId
<*>
HM
.
lookup
cn
md
|
(
cn
,
setContactId
)
<-
HM
.
toList
mc
,
cId
<-
Set
.
toList
setContactId
]
------------------------------------------------------------------------
getNgramsContactId
::
AnnuaireId
->
Cmd
err
(
Map
ContactName
(
Set
NodeId
))
->
Cmd
err
(
Hash
Map
ContactName
(
Set
NodeId
))
getNgramsContactId
aId
=
do
contacts
<-
getAllContacts
aId
pure
$
fromListWith
(
<>
)
pure
$
HM
.
fromListWith
(
<>
)
$
catMaybes
$
map
(
\
contact
->
(,)
<$>
contact
^.
(
node_hyperdata
.
hc_who
.
_Just
.
cw_lastName
)
$
map
(
\
contact
->
(,)
<$>
(
NgramsTerm
<$>
contact
^.
(
node_hyperdata
.
hc_who
.
_Just
.
cw_lastName
)
)
<*>
Just
(
Set
.
singleton
(
contact
^.
node_id
))
)
(
tr_docs
contacts
)
...
...
@@ -176,7 +177,7 @@ getNgramsContactId aId = do
getNgramsDocId
::
CorpusId
->
ListId
->
NgramsType
->
GargNoServer
(
Map
DocAuthor
(
Set
NodeId
))
->
GargNoServer
(
Hash
Map
DocAuthor
(
Set
NodeId
))
getNgramsDocId
cId
lId
nt
=
do
repo
<-
getRepo
lIds
<-
selectNodesWithUsername
NodeList
userMaster
...
...
src/Gargantext/Database/Action/Metrics.hs
View file @
98e64947
...
...
@@ -44,7 +44,7 @@ getNgramsCooc :: (FlowCmdM env err m)
=>
CorpusId
->
Maybe
ListId
->
TabType
->
Maybe
Limit
->
m
(
Map
Text
(
ListType
,
Maybe
Text
)
,
Map
Text
(
Maybe
RootTerm
)
,
Map
(
Text
,
Text
)
Int
,
Hash
Map
(
Text
,
Text
)
Int
)
getNgramsCooc
cId
maybeListId
tabType
maybeLimit
=
do
(
ngs'
,
ngs
)
<-
getNgrams
cId
maybeListId
tabType
...
...
@@ -56,7 +56,7 @@ getNgramsCooc cId maybeListId tabType maybeLimit = do
lId
<-
defaultList
cId
lIds
<-
selectNodesWithUsername
NodeList
userMaster
myCooc
<-
Map
.
filter
(
>
1
)
<$>
getCoocByNgrams
(
Diagonal
True
)
myCooc
<-
HM
.
filter
(
>
1
)
<$>
getCoocByNgrams
(
Diagonal
True
)
<$>
groupNodesByNgrams
ngs
<$>
getNodesByNgramsOnlyUser
cId
(
lIds
<>
[
lId
])
(
ngramsTypeFromTabType
tabType
)
(
take'
maybeLimit
$
Map
.
keys
ngs
)
...
...
src/Gargantext/Database/Action/Metrics/NgramsByNode.hs
View file @
98e64947
...
...
@@ -17,50 +17,51 @@ module Gargantext.Database.Action.Metrics.NgramsByNode
where
import
Data.Map.Strict
(
Map
,
fromListWith
,
elems
,
toList
)
import
Data.Map.Strict.Patch
(
PatchMap
,
Replace
,
diff
)
import
Data.HashMap.Strict
(
HashMap
)
import
qualified
Data.HashMap.Strict
as
HM
--import Data.Map.Strict.Patch (PatchMap, Replace, diff)
import
Data.Set
(
Set
)
import
Data.Text
(
Text
)
import
Data.Tuple.Extra
(
second
,
swap
)
import
Data.Tuple.Extra
(
first
,
second
,
swap
)
import
Database.PostgreSQL.Simple.SqlQQ
(
sql
)
import
Database.PostgreSQL.Simple.Types
(
Values
(
..
),
QualifiedIdentifier
(
..
))
import
Debug.Trace
(
trace
)
import
Gargantext.API.Ngrams.Types
(
NgramsTerm
(
..
))
import
Gargantext.Database.Admin.Config
(
nodeTypeId
)
import
Gargantext.Database.Admin.Types.Node
-- (ListId, CorpusId, NodeId)
import
Gargantext.Database.Prelude
(
Cmd
,
runPGSQuery
)
import
Gargantext.Database.Schema.Ngrams
(
ngramsTypeId
,
NgramsType
(
..
))
import
Gargantext.Data.HashMap.Strict.Utils
as
HM
import
Gargantext.Prelude
import
qualified
Data.Map.Strict
as
Map
import
qualified
Data.Set
as
Set
import
qualified
Database.PostgreSQL.Simple
as
DPS
-- | fst is size of Supra Corpus
-- snd is Texts and size of Occurrences (different docs)
countNodesByNgramsWith
::
(
Text
->
Text
)
->
Map
Text
(
Set
NodeId
)
->
(
Double
,
Map
Text
(
Double
,
Set
Text
))
countNodesByNgramsWith
::
(
NgramsTerm
->
NgramsTerm
)
->
HashMap
NgramsTerm
(
Set
NodeId
)
->
(
Double
,
HashMap
NgramsTerm
(
Double
,
Set
NgramsTerm
))
countNodesByNgramsWith
f
m
=
(
total
,
m'
)
where
total
=
fromIntegral
$
Set
.
size
$
Set
.
unions
$
elems
m
m'
=
Map
.
map
(
swap
.
second
(
fromIntegral
.
Set
.
size
))
total
=
fromIntegral
$
Set
.
size
$
Set
.
unions
$
HM
.
elems
m
m'
=
HM
.
map
(
swap
.
second
(
fromIntegral
.
Set
.
size
))
$
groupNodesByNgramsWith
f
m
groupNodesByNgramsWith
::
(
Text
->
Text
)
->
Map
Text
(
Set
NodeId
)
->
Map
Text
(
Set
Text
,
Set
NodeId
)
groupNodesByNgramsWith
::
(
NgramsTerm
->
NgramsTerm
)
->
HashMap
NgramsTerm
(
Set
NodeId
)
->
HashMap
NgramsTerm
(
Set
NgramsTerm
,
Set
NodeId
)
groupNodesByNgramsWith
f
m
=
fromListWith
(
<>
)
$
map
(
\
(
t
,
ns
)
->
(
f
t
,
(
Set
.
singleton
t
,
ns
)))
$
toList
m
HM
.
fromListWith
(
<>
)
$
map
(
\
(
t
,
ns
)
->
(
f
t
,
(
Set
.
singleton
t
,
ns
)))
$
HM
.
toList
m
------------------------------------------------------------------------
getNodesByNgramsUser
::
CorpusId
->
NgramsType
->
Cmd
err
(
Map
Text
(
Set
NodeId
))
->
Cmd
err
(
HashMap
NgramsTerm
(
Set
NodeId
))
getNodesByNgramsUser
cId
nt
=
fromListWith
(
<>
)
<$>
map
(
\
(
n
,
t
)
->
(
t
,
Set
.
singleton
n
))
HM
.
fromListWith
(
<>
)
<$>
map
(
\
(
n
,
t
)
->
(
NgramsTerm
t
,
Set
.
singleton
n
))
<$>
selectNgramsByNodeUser
cId
nt
where
...
...
@@ -95,19 +96,19 @@ getNodesByNgramsUser cId nt =
-- TODO add groups
getOccByNgramsOnlyFast
::
CorpusId
->
NgramsType
->
[
Text
]
->
Cmd
err
(
Map
Text
Int
)
->
[
NgramsTerm
]
->
Cmd
err
(
HashMap
NgramsTerm
Int
)
getOccByNgramsOnlyFast
cId
nt
ngs
=
fromListWith
(
+
)
<$>
selectNgramsOccurrencesOnlyByNodeUser
cId
nt
ngs
HM
.
fromListWith
(
+
)
<$>
selectNgramsOccurrencesOnlyByNodeUser
cId
nt
ngs
getOccByNgramsOnlyFast'
::
CorpusId
->
ListId
->
NgramsType
->
[
Text
]
->
Cmd
err
(
Map
Text
Int
)
->
[
NgramsTerm
]
->
Cmd
err
(
HashMap
NgramsTerm
Int
)
getOccByNgramsOnlyFast'
cId
lId
nt
tms
=
trace
(
show
(
cId
,
lId
))
$
fromListWith
(
+
)
<$>
map
(
second
round
)
<$>
run
cId
lId
nt
tms
HM
.
fromListWith
(
+
)
<$>
map
(
second
round
)
<$>
run
cId
lId
nt
tms
where
fields
=
[
QualifiedIdentifier
Nothing
"text"
]
...
...
@@ -115,10 +116,10 @@ getOccByNgramsOnlyFast' cId lId nt tms = trace (show (cId, lId)) $
run
::
CorpusId
->
ListId
->
NgramsType
->
[
Text
]
->
Cmd
err
[(
Text
,
Double
)]
run
cId'
lId'
nt'
tms'
=
runPGSQuery
query
(
Values
fields
(
DPS
.
Only
<$>
tms'
)
->
[
NgramsTerm
]
->
Cmd
err
[(
NgramsTerm
,
Double
)]
run
cId'
lId'
nt'
tms'
=
fmap
(
first
NgramsTerm
)
<$>
runPGSQuery
query
(
Values
fields
(
(
DPS
.
Only
.
unNgramsTerm
)
<$>
tms'
)
,
cId'
,
lId'
,
ngramsTypeId
nt'
...
...
@@ -143,10 +144,10 @@ getOccByNgramsOnlySlow :: NodeType
->
CorpusId
->
[
ListId
]
->
NgramsType
->
[
Text
]
->
Cmd
err
(
Map
Text
Int
)
->
[
NgramsTerm
]
->
Cmd
err
(
HashMap
NgramsTerm
Int
)
getOccByNgramsOnlySlow
t
cId
ls
nt
ngs
=
Map
.
map
Set
.
size
<$>
getScore'
t
cId
ls
nt
ngs
HM
.
map
Set
.
size
<$>
getScore'
t
cId
ls
nt
ngs
where
getScore'
NodeCorpus
=
getNodesByNgramsOnlyUser
getScore'
NodeDocument
=
getNgramsByDocOnlyUser
...
...
@@ -155,25 +156,27 @@ getOccByNgramsOnlySlow t cId ls nt ngs =
getOccByNgramsOnlySafe
::
CorpusId
->
[
ListId
]
->
NgramsType
->
[
Text
]
->
Cmd
err
(
Map
Text
Int
)
->
[
NgramsTerm
]
->
Cmd
err
(
HashMap
NgramsTerm
Int
)
getOccByNgramsOnlySafe
cId
ls
nt
ngs
=
do
printDebug
"getOccByNgramsOnlySafe"
(
cId
,
nt
,
length
ngs
)
fast
<-
getOccByNgramsOnlyFast
cId
nt
ngs
slow
<-
getOccByNgramsOnlySlow
NodeCorpus
cId
ls
nt
ngs
when
(
fast
/=
slow
)
$
printDebug
"getOccByNgramsOnlySafe: difference"
(
diff
slow
fast
::
PatchMap
Text
(
Replace
(
Maybe
Int
)))
(
HM
.
difference
slow
fast
,
HM
.
difference
fast
slow
)
-- diff slow fast :: PatchMap Text (Replace (Maybe Int))
pure
slow
selectNgramsOccurrencesOnlyByNodeUser
::
CorpusId
->
NgramsType
->
[
Text
]
->
Cmd
err
[(
Text
,
Int
)]
->
[
NgramsTerm
]
->
Cmd
err
[(
NgramsTerm
,
Int
)]
selectNgramsOccurrencesOnlyByNodeUser
cId
nt
tms
=
fmap
(
first
NgramsTerm
)
<$>
runPGSQuery
queryNgramsOccurrencesOnlyByNodeUser
(
Values
fields
(
DPS
.
Only
<$>
tms
)
(
Values
fields
(
(
DPS
.
Only
.
unNgramsTerm
)
<$>
tms
)
,
cId
,
nodeTypeId
NodeDocument
,
ngramsTypeId
nt
...
...
@@ -218,11 +221,11 @@ queryNgramsOccurrencesOnlyByNodeUser' = [sql|
getNodesByNgramsOnlyUser
::
CorpusId
->
[
ListId
]
->
NgramsType
->
[
Text
]
->
Cmd
err
(
Map
Text
(
Set
NodeId
))
->
[
NgramsTerm
]
->
Cmd
err
(
HashMap
NgramsTerm
(
Set
NodeId
))
getNodesByNgramsOnlyUser
cId
ls
nt
ngs
=
Map
.
unionsWith
(
<>
)
.
map
(
fromListWith
(
<>
)
unionsWith
(
<>
)
.
map
(
HM
.
fromListWith
(
<>
)
.
map
(
second
Set
.
singleton
))
<$>
mapM
(
selectNgramsOnlyByNodeUser
cId
ls
nt
)
(
splitEvery
1000
ngs
)
...
...
@@ -231,11 +234,11 @@ getNodesByNgramsOnlyUser cId ls nt ngs =
getNgramsByNodeOnlyUser
::
NodeId
->
[
ListId
]
->
NgramsType
->
[
Text
]
->
Cmd
err
(
Map
NodeId
(
Set
Text
))
->
[
NgramsTerm
]
->
Cmd
err
(
HashMap
NodeId
(
Set
NgramsTerm
))
getNgramsByNodeOnlyUser
cId
ls
nt
ngs
=
Map
.
unionsWith
(
<>
)
.
map
(
fromListWith
(
<>
)
unionsWith
(
<>
)
.
map
(
HM
.
fromListWith
(
<>
)
.
map
(
second
Set
.
singleton
))
.
map
(
map
swap
)
<$>
mapM
(
selectNgramsOnlyByNodeUser
cId
ls
nt
)
...
...
@@ -245,11 +248,12 @@ getNgramsByNodeOnlyUser cId ls nt ngs =
selectNgramsOnlyByNodeUser
::
CorpusId
->
[
ListId
]
->
NgramsType
->
[
Text
]
->
Cmd
err
[(
Text
,
NodeId
)]
->
[
NgramsTerm
]
->
Cmd
err
[(
NgramsTerm
,
NodeId
)]
selectNgramsOnlyByNodeUser
cId
ls
nt
tms
=
fmap
(
first
NgramsTerm
)
<$>
runPGSQuery
queryNgramsOnlyByNodeUser
(
Values
fields
(
DPS
.
Only
<$>
tms
)
(
Values
fields
(
(
DPS
.
Only
.
unNgramsTerm
)
<$>
tms
)
,
Values
[
QualifiedIdentifier
Nothing
"int4"
]
(
DPS
.
Only
<$>
(
map
(
\
(
NodeId
n
)
->
n
)
ls
))
,
cId
...
...
@@ -312,22 +316,23 @@ queryNgramsOnlyByNodeUser' = [sql|
getNgramsByDocOnlyUser
::
DocId
->
[
ListId
]
->
NgramsType
->
[
Text
]
->
Cmd
err
(
Map
Text
(
Set
NodeId
))
->
[
NgramsTerm
]
->
Cmd
err
(
HashMap
NgramsTerm
(
Set
NodeId
))
getNgramsByDocOnlyUser
cId
ls
nt
ngs
=
Map
.
unionsWith
(
<>
)
.
map
(
fromListWith
(
<>
)
.
map
(
second
Set
.
singleton
))
unionsWith
(
<>
)
.
map
(
HM
.
fromListWith
(
<>
)
.
map
(
second
Set
.
singleton
))
<$>
mapM
(
selectNgramsOnlyByDocUser
cId
ls
nt
)
(
splitEvery
1000
ngs
)
selectNgramsOnlyByDocUser
::
DocId
->
[
ListId
]
->
NgramsType
->
[
Text
]
->
Cmd
err
[(
Text
,
NodeId
)]
->
[
NgramsTerm
]
->
Cmd
err
[(
NgramsTerm
,
NodeId
)]
selectNgramsOnlyByDocUser
dId
ls
nt
tms
=
fmap
(
first
NgramsTerm
)
<$>
runPGSQuery
queryNgramsOnlyByDocUser
(
Values
fields
(
DPS
.
Only
<$>
tms
)
(
Values
fields
(
(
DPS
.
Only
.
unNgramsTerm
)
<$>
tms
)
,
Values
[
QualifiedIdentifier
Nothing
"int4"
]
(
DPS
.
Only
<$>
(
map
(
\
(
NodeId
n
)
->
n
)
ls
))
,
dId
...
...
@@ -352,9 +357,9 @@ queryNgramsOnlyByDocUser = [sql|
------------------------------------------------------------------------
-- | TODO filter by language, database, any social field
getNodesByNgramsMaster
::
UserCorpusId
->
MasterCorpusId
->
Cmd
err
(
Map
Text
(
Set
NodeId
))
getNodesByNgramsMaster
ucId
mcId
=
Map
.
unionsWith
(
<>
)
.
map
(
fromListWith
(
<>
)
.
map
(
\
(
n
,
t
)
->
(
t
,
Set
.
singleton
n
)))
getNodesByNgramsMaster
::
UserCorpusId
->
MasterCorpusId
->
Cmd
err
(
Hash
Map
Text
(
Set
NodeId
))
getNodesByNgramsMaster
ucId
mcId
=
unionsWith
(
<>
)
.
map
(
HM
.
fromListWith
(
<>
)
.
map
(
\
(
n
,
t
)
->
(
t
,
Set
.
singleton
n
)))
-- . takeWhile (not . List.null)
-- . takeWhile (\l -> List.length l > 3)
<$>
mapM
(
selectNgramsByNodeMaster
1000
ucId
mcId
)
[
0
,
500
..
10000
]
...
...
src/Gargantext/Database/Action/Metrics/TFICF.hs
View file @
98e64947
...
...
@@ -16,7 +16,8 @@ module Gargantext.Database.Action.Metrics.TFICF
-- import Debug.Trace (trace)
-- import Gargantext.Core (Lang(..))
import
Data.Map.Strict
(
Map
,
toList
,
fromList
)
import
Data.HashMap.Strict
(
HashMap
)
import
qualified
Data.HashMap.Strict
as
HM
import
Data.Maybe
(
fromMaybe
)
import
Data.Text
(
Text
)
import
Gargantext.Core.Text.Metrics.TFICF
...
...
@@ -25,31 +26,28 @@ import Gargantext.Database.Admin.Types.Node -- (ListId, CorpusId, NodeId)
import
Gargantext.Database.Prelude
(
Cmd
)
import
Gargantext.Database.Query.Table.NodeNode
(
selectCountDocs
)
import
Gargantext.Database.Schema.Ngrams
(
NgramsType
(
..
))
import
Gargantext.API.Ngrams.Types
import
Gargantext.Prelude
import
qualified
Data.Map.Strict
as
Map
import
qualified
Data.Set
as
Set
getTficf
::
UserCorpusId
->
MasterCorpusId
->
NgramsType
->
Cmd
err
(
Map
Text
Double
)
->
Cmd
err
(
HashMap
NgramsTerm
Double
)
getTficf
cId
mId
nt
=
do
mapTextDoubleLocal
<-
Map
.
filter
(
>
1
)
<$>
Map
.
map
(
fromIntegral
.
Set
.
size
)
mapTextDoubleLocal
<-
HM
.
filter
(
>
1
)
<$>
HM
.
map
(
fromIntegral
.
Set
.
size
)
<$>
getNodesByNgramsUser
cId
nt
mapTextDoubleGlobal
<-
Map
.
map
fromIntegral
<$>
getOccByNgramsOnlyFast
mId
nt
(
Map
.
keys
mapTextDoubleLocal
)
mapTextDoubleGlobal
<-
HM
.
map
fromIntegral
<$>
getOccByNgramsOnlyFast
mId
nt
(
HM
.
keys
mapTextDoubleLocal
)
countLocal
<-
selectCountDocs
cId
countGlobal
<-
selectCountDocs
mId
pure
$
fromList
[
(
t
,
tficf
(
TficfInfra
(
Count
n
)
(
Total
$
fromIntegral
countLocal
))
(
TficfSupra
(
Count
$
fromMaybe
0
$
Map
.
lookup
t
mapTextDoubleGlobal
)
pure
$
HM
.
mapWithKey
(
\
t
n
->
tficf
(
TficfInfra
(
Count
n
)
(
Total
$
fromIntegral
countLocal
))
(
TficfSupra
(
Count
$
fromMaybe
0
$
HM
.
lookup
t
mapTextDoubleGlobal
)
(
Total
$
fromIntegral
countGlobal
))
)
|
(
t
,
n
)
<-
toList
mapTextDoubleLocal
]
)
mapTextDoubleLocal
\ No newline at end of file
src/Gargantext/Database/Admin/Types/Node.hs
View file @
98e64947
...
...
@@ -24,6 +24,7 @@ import Control.Monad (mzero)
import
Data.Aeson
import
Data.Aeson.TH
(
deriveJSON
)
import
Data.Either
import
Data.Hashable
(
Hashable
)
import
Data.Swagger
import
Data.Text
(
Text
,
unpack
)
import
Data.Time
(
UTCTime
)
...
...
@@ -130,7 +131,7 @@ pgNodeId = O.pgInt4 . id2int
------------------------------------------------------------------------
newtype
NodeId
=
NodeId
Int
deriving
(
Show
,
Read
,
Generic
,
Num
,
Eq
,
Ord
,
Enum
,
ToJSONKey
,
FromJSONKey
,
ToJSON
,
FromJSON
)
deriving
(
Show
,
Read
,
Generic
,
Num
,
Eq
,
Ord
,
Enum
,
ToJSONKey
,
FromJSONKey
,
ToJSON
,
FromJSON
,
Hashable
)
unNodeId
::
NodeId
->
Int
unNodeId
(
NodeId
n
)
=
n
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment