Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
153
Issues
153
List
Board
Labels
Milestones
Merge Requests
9
Merge Requests
9
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
gargantext
haskell-gargantext
Commits
00f3dc5f
Commit
00f3dc5f
authored
Dec 01, 2022
by
Alexandre Delanoë
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
[FIX] Quick Fix of the ngrams building list
parent
22d644f2
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
59 additions
and
49 deletions
+59
-49
gargantext.cabal
gargantext.cabal
+1
-1
List.hs
src/Gargantext/Core/Text/List.hs
+58
-48
No files found.
gargantext.cabal
View file @
00f3dc5f
...
...
@@ -5,7 +5,7 @@ cabal-version: 1.12
-- see: https://github.com/sol/hpack
name: gargantext
version: 0.0.6.8.5.3
version:
0.0.6.8.5.3
synopsis: Search, map, share
description: Please see README.md
category: Data
...
...
src/Gargantext/Core/Text/List.hs
View file @
00f3dc5f
...
...
@@ -11,6 +11,7 @@ Portability : POSIX
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE BangPatterns #-}
module
Gargantext.Core.Text.List
where
...
...
@@ -22,6 +23,7 @@ import Data.Map (Map)
import
Data.Monoid
(
mempty
)
import
Data.Ord
(
Down
(
..
))
import
Data.Set
(
Set
)
import
Data.Text
(
Text
)
import
Data.Tuple.Extra
(
both
)
import
Gargantext.API.Ngrams.Types
(
NgramsElement
,
NgramsTerm
(
..
))
import
Gargantext.Core.NodeStory
...
...
@@ -138,7 +140,7 @@ getGroupParams :: ( HasNodeError err
)
=>
GroupParams
->
HashSet
Ngrams
->
m
GroupParams
getGroupParams
gp
@
(
GroupWithPosTag
l
a
_m
)
ng
=
do
hashMap
<-
HashMap
.
fromList
<$>
selectLems
l
a
(
HashSet
.
toList
ng
)
!
hashMap
<-
HashMap
.
fromList
<$>
selectLems
l
a
(
HashSet
.
toList
ng
)
-- printDebug "hashMap" hashMap
pure
$
over
gwl_map
(
\
x
->
x
<>
hashMap
)
gp
getGroupParams
gp
_
=
pure
gp
...
...
@@ -162,12 +164,14 @@ buildNgramsTermsList user uCid mCid mfslw groupParams (nt, _mapListSize)= do
-- Filter 0 With Double
-- Computing global speGen score
printDebug
"[buildNgramsTermsList: Sample List] / start"
nt
allTerms
::
HashMap
NgramsTerm
Double
<-
getTficf_withSample
uCid
mCid
nt
!
(
allTerms
::
HashMap
NgramsTerm
Double
)
<-
getTficf_withSample
uCid
mCid
nt
printDebug
"[buildNgramsTermsList: Sample List / end]"
(
nt
,
HashMap
.
size
allTerms
)
printDebug
"[buildNgramsTermsList: Flow Social List / start]"
nt
-- PrivateFirst for first developments since Public NodeMode is not implemented yet
socialLists
::
FlowCont
NgramsTerm
FlowListScores
!
(
socialLists
::
FlowCont
NgramsTerm
FlowListScores
)
<-
flowSocialList
mfslw
user
nt
(
FlowCont
HashMap
.
empty
$
HashMap
.
fromList
$
List
.
zip
(
HashMap
.
keys
allTerms
)
...
...
@@ -175,64 +179,70 @@ buildNgramsTermsList user uCid mCid mfslw groupParams (nt, _mapListSize)= do
)
printDebug
"[buildNgramsTermsList: Flow Social List / end]"
nt
let
ngramsKeys
=
HashMap
.
keysSet
allTerms
let
!
ngramsKeys
=
HashSet
.
fromList
$
List
.
take
1000
$
HashSet
.
toList
$
HashMap
.
keysSet
allTerms
groupParams'
<-
getGroupParams
groupParams
(
HashSet
.
map
(
text2ngrams
.
unNgramsTerm
)
ngramsKeys
)
printDebug
"[buildNgramsTermsList: ngramsKeys]"
(
HashSet
.
size
ngramsKeys
)
!
groupParams'
<-
getGroupParams
groupParams
(
HashSet
.
map
(
text2ngrams
.
unNgramsTerm
)
ngramsKeys
)
printDebug
"[buildNgramsTermsList: groupParams']"
(
""
::
Text
)
let
socialLists_Stemmed
=
addScoreStem
groupParams'
ngramsKeys
socialLists
!
socialLists_Stemmed
=
addScoreStem
groupParams'
ngramsKeys
socialLists
--printDebug "socialLists_Stemmed" socialLists_Stemmed
groupedWithList
=
toGroupedTree
socialLists_Stemmed
allTerms
(
stopTerms
,
candidateTerms
)
=
HashMap
.
partition
((
==
Just
StopTerm
)
.
viewListType
)
!
groupedWithList
=
toGroupedTree
socialLists_Stemmed
allTerms
!
(
stopTerms
,
candidateTerms
)
=
HashMap
.
partition
((
==
Just
StopTerm
)
.
viewListType
)
$
HashMap
.
filter
(
\
g
->
(
view
gts'_score
g
)
>
1
)
$
view
flc_scores
groupedWithList
(
groupedMono
,
groupedMult
)
=
HashMap
.
partitionWithKey
(
\
(
NgramsTerm
t
)
_v
->
size
t
<
2
)
candidateTerms
!
(
groupedMono
,
groupedMult
)
=
HashMap
.
partitionWithKey
(
\
(
NgramsTerm
t
)
_v
->
size
t
<
2
)
candidateTerms
-- printDebug "
stopTerms" stopTerms
printDebug
"[buildNgramsTermsList]
stopTerms"
stopTerms
-- splitting monterms and multiterms to take proportional candidates
-- use % of list if to big, or Int if too small
listSizeGlobal
=
2000
::
Double
monoSize
=
0.4
::
Double
multSize
=
1
-
monoSize
let
!
listSizeGlobal
=
2000
::
Double
!
monoSize
=
0.4
::
Double
!
multSize
=
1
-
monoSize
splitAt
n'
ns
=
both
(
HashMap
.
fromListWith
(
<>
))
$
List
.
splitAt
(
round
$
n'
*
listSizeGlobal
)
$
List
.
sortOn
(
viewScore
.
snd
)
$
HashMap
.
toList
ns
$
List
.
splitAt
(
round
$
n'
*
listSizeGlobal
)
$
List
.
sortOn
(
viewScore
.
snd
)
$
HashMap
.
toList
ns
(
groupedMonoHead
,
_groupedMonoTail
)
=
splitAt
monoSize
groupedMono
(
groupedMultHead
,
groupedMultTail
)
=
splitAt
multSize
groupedMult
!
(
groupedMonoHead
,
_groupedMonoTail
)
=
splitAt
monoSize
groupedMono
!
(
groupedMultHead
,
groupedMultTail
)
=
splitAt
multSize
groupedMult
-------------------------
-- Filter 1 With Set NodeId and SpeGen
selectedTerms
=
Set
.
toList
$
hasTerms
(
groupedMonoHead
<>
groupedMultHead
)
!
selectedTerms
=
Set
.
toList
$
hasTerms
(
groupedMonoHead
<>
groupedMultHead
)
printDebug
"[buildNgramsTermsList: selectedTerms]"
selectedTerms
-- TODO remove (and remove HasNodeError instance)
userListId
<-
defaultList
uCid
masterListId
<-
defaultList
mCid
!
userListId
<-
defaultList
uCid
!
masterListId
<-
defaultList
mCid
mapTextDocIds
<-
getContextsByNgramsOnlyUser
uCid
!
mapTextDocIds
<-
getContextsByNgramsOnlyUser
uCid
[
userListId
,
masterListId
]
nt
selectedTerms
-- printDebug "mapTextDocIds
" mapTextDocIds
printDebug
"[buildNgramsTermsList: mapTextDocIds]
"
mapTextDocIds
let
groupedTreeScores_SetNodeId
::
HashMap
NgramsTerm
(
GroupedTreeScores
(
Set
NodeId
))
groupedTreeScores_SetNodeId
=
HashMap
.
filter
(
\
g
->
Set
.
size
(
view
gts'_score
g
)
>
1
)
-- removing hapax
!
groupedTreeScores_SetNodeId
=
HashMap
.
filter
(
\
g
->
Set
.
size
(
view
gts'_score
g
)
>
1
)
-- removing hapax
$
setScoresWithMap
mapTextDocIds
(
groupedMonoHead
<>
groupedMultHead
)
--printDebug "groupedTreeScores_SetNodeId
" groupedTreeScores_SetNodeId
printDebug
"[buildNgramsTermsList: groupedTreeScores_SetNodeId]
"
groupedTreeScores_SetNodeId
-- Coocurrences computation
--, t1 >= t2 -- permute byAxis diag -- since matrix symmetric
let
mapCooc
=
HashMap
.
filter
(
>
1
)
-- removing cooc of 1
let
!
mapCooc
=
HashMap
.
filter
(
>
1
)
-- removing cooc of 1
$
HashMap
.
fromList
[
((
t1
,
t2
),
Set
.
size
$
Set
.
intersection
s1
s2
)
|
(
t1
,
s1
)
<-
mapStemNodeIds
,
(
t2
,
s2
)
<-
mapStemNodeIds
...
...
@@ -253,77 +263,77 @@ buildNgramsTermsList user uCid mCid mfslw groupParams (nt, _mapListSize)= do
let
groupedTreeScores_SpeGen
::
HashMap
NgramsTerm
(
GroupedTreeScores
(
Scored
NgramsTerm
))
groupedTreeScores_SpeGen
=
setScoresWithMap
(
mapScores
identity
)
groupedTreeScores_SetNodeId
!
groupedTreeScores_SpeGen
=
setScoresWithMap
(
mapScores
identity
)
groupedTreeScores_SetNodeId
let
-- sort / partition / split
-- filter mono/multi again
(
monoScored
,
multScored
)
=
HashMap
.
partitionWithKey
(
\
(
NgramsTerm
t
)
_v
->
size
t
<
2
)
groupedTreeScores_SpeGen
!
(
monoScored
,
multScored
)
=
HashMap
.
partitionWithKey
(
\
(
NgramsTerm
t
)
_v
->
size
t
<
2
)
groupedTreeScores_SpeGen
-- filter with max score
partitionWithMaxScore
=
HashMap
.
partition
(
\
g
->
(
view
scored_genInc
$
view
gts'_score
g
)
>
(
view
scored_speExc
$
view
gts'_score
g
)
)
(
monoScoredIncl
,
monoScoredExcl
)
=
partitionWithMaxScore
monoScored
(
multScoredIncl
,
multScoredExcl
)
=
partitionWithMaxScore
multScored
!
(
monoScoredIncl
,
monoScoredExcl
)
=
partitionWithMaxScore
monoScored
!
(
multScoredIncl
,
multScoredExcl
)
=
partitionWithMaxScore
multScored
-- splitAt
let
-- use % of list if to big, or Int if to small
mapSize
=
1000
::
Double
canSize
=
mapSize
*
2
::
Double
!
mapSize
=
1000
::
Double
!
canSize
=
mapSize
*
2
::
Double
inclSize
=
0.4
::
Double
exclSize
=
1
-
inclSize
!
inclSize
=
0.4
::
Double
!
exclSize
=
1
-
inclSize
splitAt'
max'
n'
=
(
both
(
HashMap
.
fromList
))
.
(
List
.
splitAt
(
round
$
n'
*
max'
))
sortOn
f
=
(
List
.
sortOn
(
Down
.
(
view
(
gts'_score
.
f
))
.
snd
))
.
HashMap
.
toList
sortOn
f
=
(
List
.
sortOn
(
Down
.
(
view
(
gts'_score
.
f
))
.
snd
))
.
HashMap
.
toList
monoInc_size
n
=
splitAt'
n
$
monoSize
*
inclSize
/
2
multExc_size
n
=
splitAt'
n
$
multSize
*
exclSize
/
2
(
mapMonoScoredInclHead
,
monoScoredInclTail
)
=
monoInc_size
mapSize
$
(
sortOn
scored_genInc
)
monoScoredIncl
(
mapMonoScoredExclHead
,
monoScoredExclTail
)
=
monoInc_size
mapSize
$
(
sortOn
scored_speExc
)
monoScoredExcl
!
(
mapMonoScoredInclHead
,
monoScoredInclTail
)
=
monoInc_size
mapSize
$
(
sortOn
scored_genInc
)
monoScoredIncl
!
(
mapMonoScoredExclHead
,
monoScoredExclTail
)
=
monoInc_size
mapSize
$
(
sortOn
scored_speExc
)
monoScoredExcl
(
mapMultScoredInclHead
,
multScoredInclTail
)
=
multExc_size
mapSize
$
(
sortOn
scored_genInc
)
multScoredIncl
(
mapMultScoredExclHead
,
multScoredExclTail
)
=
multExc_size
mapSize
$
(
sortOn
scored_speExc
)
multScoredExcl
!
(
mapMultScoredInclHead
,
multScoredInclTail
)
=
multExc_size
mapSize
$
(
sortOn
scored_genInc
)
multScoredIncl
!
(
mapMultScoredExclHead
,
multScoredExclTail
)
=
multExc_size
mapSize
$
(
sortOn
scored_speExc
)
multScoredExcl
(
canMonoScoredIncHead
,
_
)
=
monoInc_size
canSize
$
(
sortOn
scored_genInc
)
monoScoredInclTail
(
canMonoScoredExclHead
,
_
)
=
monoInc_size
canSize
$
(
sortOn
scored_speExc
)
monoScoredExclTail
!
(
canMonoScoredIncHead
,
_
)
=
monoInc_size
canSize
$
(
sortOn
scored_genInc
)
monoScoredInclTail
!
(
canMonoScoredExclHead
,
_
)
=
monoInc_size
canSize
$
(
sortOn
scored_speExc
)
monoScoredExclTail
(
canMulScoredInclHead
,
_
)
=
multExc_size
canSize
$
(
sortOn
scored_genInc
)
multScoredInclTail
(
canMultScoredExclHead
,
_
)
=
multExc_size
canSize
$
(
sortOn
scored_speExc
)
multScoredExclTail
!
(
canMulScoredInclHead
,
_
)
=
multExc_size
canSize
$
(
sortOn
scored_genInc
)
multScoredInclTail
!
(
canMultScoredExclHead
,
_
)
=
multExc_size
canSize
$
(
sortOn
scored_speExc
)
multScoredExclTail
------------------------------------------------------------
-- Final Step building the Typed list
-- Candidates Terms need to be filtered
let
maps
=
setListType
(
Just
MapTerm
)
!
maps
=
setListType
(
Just
MapTerm
)
$
mapMonoScoredInclHead
<>
mapMonoScoredExclHead
<>
mapMultScoredInclHead
<>
mapMultScoredExclHead
-- An original way to filter to start with
cands
=
setListType
(
Just
CandidateTerm
)
!
cands
=
setListType
(
Just
CandidateTerm
)
$
canMonoScoredIncHead
<>
canMonoScoredExclHead
<>
canMulScoredInclHead
<>
canMultScoredExclHead
-- TODO count it too
cands'
=
setListType
(
Just
CandidateTerm
)
!
cands'
=
setListType
(
Just
CandidateTerm
)
{-\$ groupedMonoTail
<>-}
groupedMultTail
-- Quick FIX
candNgramsElement
=
List
.
take
1000
!
candNgramsElement
=
List
.
take
1000
$
toNgramsElement
cands
<>
toNgramsElement
cands'
result
=
Map
.
unionsWith
(
<>
)
!
result
=
Map
.
unionsWith
(
<>
)
[
Map
.
fromList
[(
nt
,
toNgramsElement
maps
<>
toNgramsElement
stopTerms
<>
candNgramsElement
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment