Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
158
Issues
158
List
Board
Labels
Milestones
Merge Requests
11
Merge Requests
11
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
gargantext
haskell-gargantext
Commits
4838c6b8
Commit
4838c6b8
authored
Aug 31, 2020
by
Alexandre Delanoë
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
[OPTIM + FIX] TFICF
parent
e93236e8
Pipeline
#1026
failed with stage
Changes
7
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
Showing
7 changed files
with
85 additions
and
66 deletions
+85
-66
List.hs
src/Gargantext/Core/Text/List.hs
+8
-7
TFICF.hs
src/Gargantext/Core/Text/Metrics/TFICF.hs
+4
-5
Legend.hs
src/Gargantext/Core/Viz/Graph/Legend.hs
+2
-1
NgramsByNode.hs
src/Gargantext/Database/Action/Metrics/NgramsByNode.hs
+1
-48
TFICF.hs
src/Gargantext/Database/Action/Metrics/TFICF.hs
+55
-0
NodeNode.hs
src/Gargantext/Database/Query/Table/NodeNode.hs
+15
-1
Root.hs
src/Gargantext/Database/Query/Tree/Root.hs
+0
-4
No files found.
src/Gargantext/Core/Text/List.hs
View file @
4838c6b8
...
...
@@ -15,13 +15,14 @@ module Gargantext.Core.Text.List
-- import Data.Either (partitionEithers, Either(..))
import
Data.Map
(
Map
)
import
Data.Set
(
Set
)
import
Data.Set
(
Set
,
empty
)
import
Data.Text
(
Text
)
import
Gargantext.API.Ngrams
(
NgramsElement
,
mkNgramsElement
,
RootParent
(
..
),
mSetFromList
)
-- import Gargantext.API.Ngrams.Tools (getCoocByNgrams', Diagonal(..))
import
Gargantext.Core
(
Lang
(
..
))
import
Gargantext.Core.Types
(
ListType
(
..
),
MasterCorpusId
,
UserCorpusId
,
Ordering
(
..
))
import
Gargantext.Database.Action.Metrics.NgramsByNode
(
getTficf
,
ngramsGroup
,
getNodesByNgramsUser
,
groupNodesByNgramsWith
)
import
Gargantext.Database.Action.Metrics.NgramsByNode
(
{-ngramsGroup,-}
getNodesByNgramsUser
,
groupNodesByNgramsWith
)
import
Gargantext.Database.Action.Metrics.TFICF
(
getTficf
)
import
Gargantext.Core.Text.Metrics.TFICF
(
sortTficf
)
import
Gargantext.Database.Prelude
(
Cmd
)
import
Gargantext.Database.Schema.Ngrams
(
NgramsType
(
..
))
...
...
@@ -138,8 +139,8 @@ buildNgramsTermsList :: Lang
->
UserCorpusId
->
MasterCorpusId
->
Cmd
err
(
Map
NgramsType
[
NgramsElement
])
buildNgramsTermsList
l
n
m
s
uCid
mCid
=
do
candidates
<-
sortTficf
Down
<$>
getTficf
uCid
mCid
NgramsTerms
(
ngramsGroup
l
n
m
)
buildNgramsTermsList
_l
_n
_
m
s
uCid
mCid
=
do
candidates
<-
sortTficf
Down
<$>
getTficf
uCid
mCid
NgramsTerms
let
candidatesSize
=
400
...
...
@@ -150,12 +151,12 @@ buildNgramsTermsList l n m s uCid mCid = do
candidatesHead
=
List
.
take
candidatesSize
candidates
candidatesTail
=
List
.
drop
candidatesSize
candidates
termList
=
termList
=
-- (toTermList a b ((isStopTerm s) . fst) candidatesHead)
(
map
(
toGargList
((
isStopTerm
s
)
.
fst
)
MapTerm
)
candidatesHead
)
(
map
(
toGargList
((
isStopTerm
s
)
.
fst
)
MapTerm
)
candidatesHead
)
<>
(
map
(
toGargList
((
isStopTerm
s
)
.
fst
)
CandidateTerm
)
candidatesTail
)
ngs
=
List
.
concat
$
map
toNgramsElement
termList
ngs
=
List
.
concat
$
map
toNgramsElement
$
map
(
\
(
lt
,
(
t
,
d
))
->
(
lt
,
((
t
,
(
d
,
empty
)))))
termList
pure
$
Map
.
fromList
[(
NgramsTerms
,
ngs
)]
...
...
src/Gargantext/Core/Text/Metrics/TFICF.hs
View file @
4838c6b8
...
...
@@ -25,7 +25,6 @@ module Gargantext.Core.Text.Metrics.TFICF ( TFICF
import
Data.Text
(
Text
)
import
Gargantext.Prelude
import
Data.Set
(
Set
)
import
Gargantext.Core.Types
(
Ordering
(
..
))
import
Data.Map.Strict
(
Map
,
toList
)
import
qualified
Data.Ord
as
DO
(
Down
(
..
))
...
...
@@ -54,8 +53,8 @@ tficf _ _ = panic $ "[ERR]" <> path <> "Undefined for these contexts"
sortTficf
::
Ordering
->
(
Map
Text
(
Double
,
Set
Text
))
->
[
(
Text
,(
Double
,
Set
Text
)
)]
sortTficf
Down
=
List
.
sortOn
(
DO
.
Down
.
fst
.
snd
)
.
toList
sortTficf
Up
=
List
.
sortOn
(
fst
.
snd
)
.
toList
->
Map
Text
Double
->
[
(
Text
,
Double
)]
sortTficf
Down
=
List
.
sortOn
(
DO
.
Down
.
snd
)
.
toList
sortTficf
Up
=
List
.
sortOn
snd
.
toList
src/Gargantext/Core/Viz/Graph/Legend.hs
View file @
4838c6b8
...
...
@@ -24,7 +24,6 @@ import Gargantext.Core.Viz.Graph.Louvain (LouvainNodeId, CommunityId, comId2node
[LouvainNode] -> Map CommunityId LouvainNodeId
[(CommunityId, [LouvainNodeId])]
sort by length LouvainNodeIds
...
...
@@ -39,6 +38,8 @@ subgraph with [LouvainNodeId]
Map NodeId Label
-> map [LouvainNodeId] -> [(CommunityId, take 3 [Label])]
use specGen incExc score to order the labels
take 7 [(CommunityId, take 3 [Label])]
...
...
src/Gargantext/Database/Action/Metrics/NgramsByNode.hs
View file @
4838c6b8
...
...
@@ -16,7 +16,7 @@ Ngrams by node enable contextual metrics.
module
Gargantext.Database.Action.Metrics.NgramsByNode
where
import
Data.Map.Strict
(
Map
,
fromListWith
,
elems
,
toList
,
fromList
)
import
Data.Map.Strict
(
Map
,
fromListWith
,
elems
,
toList
)
import
Data.Map.Strict.Patch
(
PatchMap
,
Replace
,
diff
)
import
Data.Set
(
Set
)
import
Data.Text
(
Text
)
...
...
@@ -30,7 +30,6 @@ import Gargantext.Database.Admin.Types.Node -- (ListId, CorpusId, NodeId)
import
Gargantext.Database.Prelude
(
Cmd
,
runPGSQuery
)
import
Gargantext.Database.Schema.Ngrams
(
ngramsTypeId
,
NgramsType
(
..
))
import
Gargantext.Prelude
import
Gargantext.Core.Text.Metrics.TFICF
import
Gargantext.Core.Text.Terms.Mono.Stem
(
stem
)
import
qualified
Data.List
as
List
import
qualified
Data.Map.Strict
as
Map
...
...
@@ -56,52 +55,6 @@ ngramsGroup l _m _n = Text.intercalate " "
.
Text
.
replace
"-"
" "
getTficf
::
UserCorpusId
->
MasterCorpusId
->
NgramsType
->
(
Text
->
Text
)
->
Cmd
err
(
Map
Text
(
Double
,
Set
Text
))
getTficf
u
m
nt
f
=
do
u'
<-
Map
.
filter
(
\
s
->
Set
.
size
s
>
1
)
<$>
getNodesByNgramsUser
u
nt
m'
<-
Map
.
filter
(
\
s
->
Set
.
size
s
>
1
)
<$>
getNodesByNgramsMaster
u
m
pure
$
toTficfData
(
countNodesByNgramsWith
f
u'
)
(
countNodesByNgramsWith
f
m'
)
{-
getTficfWith :: UserCorpusId
-> MasterCorpusId
-> [ListId]
-> NgramsType
-> Map Text (Maybe Text)
-> Cmd err (Map Text (Double, Set Text))
getTficfWith u m ls nt mtxt = do
u' <- getNodesByNgramsOnlyUser u ls nt (Map.keys mtxt)
m' <- getNodesByNgramsMaster u m
let f x = case Map.lookup x mtxt of
Nothing -> x
Just x' -> maybe x identity x'
pure $ toTficfData (countNodesByNgramsWith f u') (countNodesByNgramsWith f m')
-}
type
Context
=
(
Double
,
Map
Text
(
Double
,
Set
Text
))
type
Supra
=
Context
type
Infra
=
Context
toTficfData
::
Infra
->
Supra
->
Map
Text
(
Double
,
Set
Text
)
toTficfData
(
ti
,
mi
)
(
ts
,
ms
)
=
fromList
[
(
t
,
(
tficf
(
TficfInfra
(
Count
n
)(
Total
ti
))
(
TficfSupra
(
Count
$
maybe
0
fst
$
Map
.
lookup
t
ms
)(
Total
ts
))
,
ns
)
)
|
(
t
,
(
n
,
ns
))
<-
toList
mi
]
-- | fst is size of Supra Corpus
-- snd is Texts and size of Occurrences (different docs)
...
...
src/Gargantext/Database/Action/Metrics/TFICF.hs
0 → 100644
View file @
4838c6b8
{-|
Module : Gargantext.Database.Metrics.TFICF
Description : Ngrams by Node user and master
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
{-# LANGUAGE QuasiQuotes #-}
module
Gargantext.Database.Action.Metrics.TFICF
where
-- import Debug.Trace (trace)
-- import Gargantext.Core (Lang(..))
import
Data.Map.Strict
(
Map
,
toList
,
fromList
)
import
Data.Maybe
(
fromMaybe
)
import
Data.Text
(
Text
)
import
Gargantext.Core.Text.Metrics.TFICF
import
Gargantext.Database.Action.Metrics.NgramsByNode
(
getNodesByNgramsUser
,
getOccByNgramsOnlyFast
)
import
Gargantext.Database.Admin.Types.Node
-- (ListId, CorpusId, NodeId)
import
Gargantext.Database.Prelude
(
Cmd
)
import
Gargantext.Database.Query.Table.NodeNode
(
selectCountDocs
)
import
Gargantext.Database.Schema.Ngrams
(
NgramsType
(
..
))
import
Gargantext.Prelude
import
qualified
Data.Map.Strict
as
Map
import
qualified
Data.Set
as
Set
getTficf
::
UserCorpusId
->
MasterCorpusId
->
NgramsType
->
Cmd
err
(
Map
Text
Double
)
getTficf
cId
mId
nt
=
do
mapTextDoubleLocal
<-
Map
.
filter
(
>
1
)
<$>
Map
.
map
(
fromIntegral
.
Set
.
size
)
<$>
getNodesByNgramsUser
cId
nt
mapTextDoubleGlobal
<-
Map
.
map
fromIntegral
<$>
getOccByNgramsOnlyFast
mId
nt
(
Map
.
keys
mapTextDoubleLocal
)
countLocal
<-
selectCountDocs
cId
countGlobal
<-
selectCountDocs
mId
pure
$
fromList
[
(
t
,
tficf
(
TficfInfra
(
Count
n
)
(
Total
$
fromIntegral
countLocal
))
(
TficfSupra
(
Count
$
fromMaybe
0
$
Map
.
lookup
t
mapTextDoubleGlobal
)
(
Total
$
fromIntegral
countGlobal
))
)
|
(
t
,
n
)
<-
toList
mapTextDoubleLocal
]
src/Gargantext/Database/Query/Table/NodeNode.hs
View file @
4838c6b8
...
...
@@ -29,6 +29,7 @@ module Gargantext.Database.Query.Table.NodeNode
,
insertNodeNode
,
deleteNodeNode
,
selectPublicNodes
,
selectCountDocs
)
where
...
...
@@ -145,7 +146,20 @@ nodeNodesCategory inputData = map (\(PGS.Only a) -> a)
|]
------------------------------------------------------------------------
-- | TODO use UTCTime fast
selectCountDocs
::
CorpusId
->
Cmd
err
Int
selectCountDocs
cId
=
runCountOpaQuery
(
queryCountDocs
cId
)
where
queryCountDocs
cId'
=
proc
()
->
do
(
n
,
nn
)
<-
joinInCorpus
-<
()
restrict
-<
nn
^.
nn_node1_id
.==
(
toNullable
$
pgNodeId
cId'
)
restrict
-<
nn
^.
nn_category
.>=
(
toNullable
$
pgInt4
1
)
restrict
-<
n
^.
node_typename
.==
(
pgInt4
$
nodeTypeId
NodeDocument
)
returnA
-<
n
-- | TODO use UTCTime fast
selectDocsDates
::
CorpusId
->
Cmd
err
[
Text
]
selectDocsDates
cId
=
map
(
head'
"selectDocsDates"
.
splitOn
"-"
)
<$>
catMaybes
...
...
src/Gargantext/Database/Query/Tree/Root.hs
View file @
4838c6b8
...
...
@@ -96,10 +96,6 @@ getOrMk_RootWithCorpus user cName c = do
pure
(
userId
,
rootId
,
corpusId
)
mkRoot
::
HasNodeError
err
=>
User
->
Cmd
err
[
RootId
]
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment