Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
152
Issues
152
List
Board
Labels
Milestones
Merge Requests
9
Merge Requests
9
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
gargantext
haskell-gargantext
Commits
4838c6b8
Commit
4838c6b8
authored
Aug 31, 2020
by
Alexandre Delanoë
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
[OPTIM + FIX] TFICF
parent
e93236e8
Pipeline
#1026
failed with stage
Changes
7
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
Showing
7 changed files
with
85 additions
and
66 deletions
+85
-66
List.hs
src/Gargantext/Core/Text/List.hs
+8
-7
TFICF.hs
src/Gargantext/Core/Text/Metrics/TFICF.hs
+4
-5
Legend.hs
src/Gargantext/Core/Viz/Graph/Legend.hs
+2
-1
NgramsByNode.hs
src/Gargantext/Database/Action/Metrics/NgramsByNode.hs
+1
-48
TFICF.hs
src/Gargantext/Database/Action/Metrics/TFICF.hs
+55
-0
NodeNode.hs
src/Gargantext/Database/Query/Table/NodeNode.hs
+15
-1
Root.hs
src/Gargantext/Database/Query/Tree/Root.hs
+0
-4
No files found.
src/Gargantext/Core/Text/List.hs
View file @
4838c6b8
...
...
@@ -15,13 +15,14 @@ module Gargantext.Core.Text.List
-- import Data.Either (partitionEithers, Either(..))
import
Data.Map
(
Map
)
import
Data.Set
(
Set
)
import
Data.Set
(
Set
,
empty
)
import
Data.Text
(
Text
)
import
Gargantext.API.Ngrams
(
NgramsElement
,
mkNgramsElement
,
RootParent
(
..
),
mSetFromList
)
-- import Gargantext.API.Ngrams.Tools (getCoocByNgrams', Diagonal(..))
import
Gargantext.Core
(
Lang
(
..
))
import
Gargantext.Core.Types
(
ListType
(
..
),
MasterCorpusId
,
UserCorpusId
,
Ordering
(
..
))
import
Gargantext.Database.Action.Metrics.NgramsByNode
(
getTficf
,
ngramsGroup
,
getNodesByNgramsUser
,
groupNodesByNgramsWith
)
import
Gargantext.Database.Action.Metrics.NgramsByNode
(
{-ngramsGroup,-}
getNodesByNgramsUser
,
groupNodesByNgramsWith
)
import
Gargantext.Database.Action.Metrics.TFICF
(
getTficf
)
import
Gargantext.Core.Text.Metrics.TFICF
(
sortTficf
)
import
Gargantext.Database.Prelude
(
Cmd
)
import
Gargantext.Database.Schema.Ngrams
(
NgramsType
(
..
))
...
...
@@ -138,8 +139,8 @@ buildNgramsTermsList :: Lang
->
UserCorpusId
->
MasterCorpusId
->
Cmd
err
(
Map
NgramsType
[
NgramsElement
])
buildNgramsTermsList
l
n
m
s
uCid
mCid
=
do
candidates
<-
sortTficf
Down
<$>
getTficf
uCid
mCid
NgramsTerms
(
ngramsGroup
l
n
m
)
buildNgramsTermsList
_l
_n
_
m
s
uCid
mCid
=
do
candidates
<-
sortTficf
Down
<$>
getTficf
uCid
mCid
NgramsTerms
let
candidatesSize
=
400
...
...
@@ -150,12 +151,12 @@ buildNgramsTermsList l n m s uCid mCid = do
candidatesHead
=
List
.
take
candidatesSize
candidates
candidatesTail
=
List
.
drop
candidatesSize
candidates
termList
=
termList
=
-- (toTermList a b ((isStopTerm s) . fst) candidatesHead)
(
map
(
toGargList
((
isStopTerm
s
)
.
fst
)
MapTerm
)
candidatesHead
)
(
map
(
toGargList
((
isStopTerm
s
)
.
fst
)
MapTerm
)
candidatesHead
)
<>
(
map
(
toGargList
((
isStopTerm
s
)
.
fst
)
CandidateTerm
)
candidatesTail
)
ngs
=
List
.
concat
$
map
toNgramsElement
termList
ngs
=
List
.
concat
$
map
toNgramsElement
$
map
(
\
(
lt
,
(
t
,
d
))
->
(
lt
,
((
t
,
(
d
,
empty
)))))
termList
pure
$
Map
.
fromList
[(
NgramsTerms
,
ngs
)]
...
...
src/Gargantext/Core/Text/Metrics/TFICF.hs
View file @
4838c6b8
...
...
@@ -25,7 +25,6 @@ module Gargantext.Core.Text.Metrics.TFICF ( TFICF
import
Data.Text
(
Text
)
import
Gargantext.Prelude
import
Data.Set
(
Set
)
import
Gargantext.Core.Types
(
Ordering
(
..
))
import
Data.Map.Strict
(
Map
,
toList
)
import
qualified
Data.Ord
as
DO
(
Down
(
..
))
...
...
@@ -54,8 +53,8 @@ tficf _ _ = panic $ "[ERR]" <> path <> "Undefined for these contexts"
sortTficf
::
Ordering
->
(
Map
Text
(
Double
,
Set
Text
))
->
[
(
Text
,(
Double
,
Set
Text
)
)]
sortTficf
Down
=
List
.
sortOn
(
DO
.
Down
.
fst
.
snd
)
.
toList
sortTficf
Up
=
List
.
sortOn
(
fst
.
snd
)
.
toList
->
Map
Text
Double
->
[
(
Text
,
Double
)]
sortTficf
Down
=
List
.
sortOn
(
DO
.
Down
.
snd
)
.
toList
sortTficf
Up
=
List
.
sortOn
snd
.
toList
src/Gargantext/Core/Viz/Graph/Legend.hs
View file @
4838c6b8
...
...
@@ -24,7 +24,6 @@ import Gargantext.Core.Viz.Graph.Louvain (LouvainNodeId, CommunityId, comId2node
[LouvainNode] -> Map CommunityId LouvainNodeId
[(CommunityId, [LouvainNodeId])]
sort by length LouvainNodeIds
...
...
@@ -39,6 +38,8 @@ subgraph with [LouvainNodeId]
Map NodeId Label
-> map [LouvainNodeId] -> [(CommunityId, take 3 [Label])]
use specGen incExc score to order the labels
take 7 [(CommunityId, take 3 [Label])]
...
...
src/Gargantext/Database/Action/Metrics/NgramsByNode.hs
View file @
4838c6b8
...
...
@@ -16,7 +16,7 @@ Ngrams by node enable contextual metrics.
module
Gargantext.Database.Action.Metrics.NgramsByNode
where
import
Data.Map.Strict
(
Map
,
fromListWith
,
elems
,
toList
,
fromList
)
import
Data.Map.Strict
(
Map
,
fromListWith
,
elems
,
toList
)
import
Data.Map.Strict.Patch
(
PatchMap
,
Replace
,
diff
)
import
Data.Set
(
Set
)
import
Data.Text
(
Text
)
...
...
@@ -30,7 +30,6 @@ import Gargantext.Database.Admin.Types.Node -- (ListId, CorpusId, NodeId)
import
Gargantext.Database.Prelude
(
Cmd
,
runPGSQuery
)
import
Gargantext.Database.Schema.Ngrams
(
ngramsTypeId
,
NgramsType
(
..
))
import
Gargantext.Prelude
import
Gargantext.Core.Text.Metrics.TFICF
import
Gargantext.Core.Text.Terms.Mono.Stem
(
stem
)
import
qualified
Data.List
as
List
import
qualified
Data.Map.Strict
as
Map
...
...
@@ -56,52 +55,6 @@ ngramsGroup l _m _n = Text.intercalate " "
.
Text
.
replace
"-"
" "
getTficf
::
UserCorpusId
->
MasterCorpusId
->
NgramsType
->
(
Text
->
Text
)
->
Cmd
err
(
Map
Text
(
Double
,
Set
Text
))
getTficf
u
m
nt
f
=
do
u'
<-
Map
.
filter
(
\
s
->
Set
.
size
s
>
1
)
<$>
getNodesByNgramsUser
u
nt
m'
<-
Map
.
filter
(
\
s
->
Set
.
size
s
>
1
)
<$>
getNodesByNgramsMaster
u
m
pure
$
toTficfData
(
countNodesByNgramsWith
f
u'
)
(
countNodesByNgramsWith
f
m'
)
{-
getTficfWith :: UserCorpusId
-> MasterCorpusId
-> [ListId]
-> NgramsType
-> Map Text (Maybe Text)
-> Cmd err (Map Text (Double, Set Text))
getTficfWith u m ls nt mtxt = do
u' <- getNodesByNgramsOnlyUser u ls nt (Map.keys mtxt)
m' <- getNodesByNgramsMaster u m
let f x = case Map.lookup x mtxt of
Nothing -> x
Just x' -> maybe x identity x'
pure $ toTficfData (countNodesByNgramsWith f u') (countNodesByNgramsWith f m')
-}
type
Context
=
(
Double
,
Map
Text
(
Double
,
Set
Text
))
type
Supra
=
Context
type
Infra
=
Context
toTficfData
::
Infra
->
Supra
->
Map
Text
(
Double
,
Set
Text
)
toTficfData
(
ti
,
mi
)
(
ts
,
ms
)
=
fromList
[
(
t
,
(
tficf
(
TficfInfra
(
Count
n
)(
Total
ti
))
(
TficfSupra
(
Count
$
maybe
0
fst
$
Map
.
lookup
t
ms
)(
Total
ts
))
,
ns
)
)
|
(
t
,
(
n
,
ns
))
<-
toList
mi
]
-- | fst is size of Supra Corpus
-- snd is Texts and size of Occurrences (different docs)
...
...
src/Gargantext/Database/Action/Metrics/TFICF.hs
0 → 100644
View file @
4838c6b8
{-|
Module : Gargantext.Database.Metrics.TFICF
Description : Ngrams by Node user and master
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
{-# LANGUAGE QuasiQuotes #-}
module
Gargantext.Database.Action.Metrics.TFICF
where
-- import Debug.Trace (trace)
-- import Gargantext.Core (Lang(..))
import
Data.Map.Strict
(
Map
,
toList
,
fromList
)
import
Data.Maybe
(
fromMaybe
)
import
Data.Text
(
Text
)
import
Gargantext.Core.Text.Metrics.TFICF
import
Gargantext.Database.Action.Metrics.NgramsByNode
(
getNodesByNgramsUser
,
getOccByNgramsOnlyFast
)
import
Gargantext.Database.Admin.Types.Node
-- (ListId, CorpusId, NodeId)
import
Gargantext.Database.Prelude
(
Cmd
)
import
Gargantext.Database.Query.Table.NodeNode
(
selectCountDocs
)
import
Gargantext.Database.Schema.Ngrams
(
NgramsType
(
..
))
import
Gargantext.Prelude
import
qualified
Data.Map.Strict
as
Map
import
qualified
Data.Set
as
Set
getTficf
::
UserCorpusId
->
MasterCorpusId
->
NgramsType
->
Cmd
err
(
Map
Text
Double
)
getTficf
cId
mId
nt
=
do
mapTextDoubleLocal
<-
Map
.
filter
(
>
1
)
<$>
Map
.
map
(
fromIntegral
.
Set
.
size
)
<$>
getNodesByNgramsUser
cId
nt
mapTextDoubleGlobal
<-
Map
.
map
fromIntegral
<$>
getOccByNgramsOnlyFast
mId
nt
(
Map
.
keys
mapTextDoubleLocal
)
countLocal
<-
selectCountDocs
cId
countGlobal
<-
selectCountDocs
mId
pure
$
fromList
[
(
t
,
tficf
(
TficfInfra
(
Count
n
)
(
Total
$
fromIntegral
countLocal
))
(
TficfSupra
(
Count
$
fromMaybe
0
$
Map
.
lookup
t
mapTextDoubleGlobal
)
(
Total
$
fromIntegral
countGlobal
))
)
|
(
t
,
n
)
<-
toList
mapTextDoubleLocal
]
src/Gargantext/Database/Query/Table/NodeNode.hs
View file @
4838c6b8
...
...
@@ -29,6 +29,7 @@ module Gargantext.Database.Query.Table.NodeNode
,
insertNodeNode
,
deleteNodeNode
,
selectPublicNodes
,
selectCountDocs
)
where
...
...
@@ -145,7 +146,20 @@ nodeNodesCategory inputData = map (\(PGS.Only a) -> a)
|]
------------------------------------------------------------------------
-- | TODO use UTCTime fast
selectCountDocs
::
CorpusId
->
Cmd
err
Int
selectCountDocs
cId
=
runCountOpaQuery
(
queryCountDocs
cId
)
where
queryCountDocs
cId'
=
proc
()
->
do
(
n
,
nn
)
<-
joinInCorpus
-<
()
restrict
-<
nn
^.
nn_node1_id
.==
(
toNullable
$
pgNodeId
cId'
)
restrict
-<
nn
^.
nn_category
.>=
(
toNullable
$
pgInt4
1
)
restrict
-<
n
^.
node_typename
.==
(
pgInt4
$
nodeTypeId
NodeDocument
)
returnA
-<
n
-- | TODO use UTCTime fast
selectDocsDates
::
CorpusId
->
Cmd
err
[
Text
]
selectDocsDates
cId
=
map
(
head'
"selectDocsDates"
.
splitOn
"-"
)
<$>
catMaybes
...
...
src/Gargantext/Database/Query/Tree/Root.hs
View file @
4838c6b8
...
...
@@ -96,10 +96,6 @@ getOrMk_RootWithCorpus user cName c = do
pure
(
userId
,
rootId
,
corpusId
)
mkRoot
::
HasNodeError
err
=>
User
->
Cmd
err
[
RootId
]
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment