Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
147
Issues
147
List
Board
Labels
Milestones
Merge Requests
6
Merge Requests
6
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
gargantext
haskell-gargantext
Commits
d4e1dd93
Commit
d4e1dd93
authored
May 03, 2021
by
Alexandre Delanoë
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
[FIX] map list size filtered (others terms do not appear any more)
parent
17fbec42
Pipeline
#1458
failed with stage
Changes
2
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
22 additions
and
33 deletions
+22
-33
List.hs
src/Gargantext/API/Ngrams/List.hs
+4
-4
List.hs
src/Gargantext/Core/Text/List.hs
+18
-29
No files found.
src/Gargantext/API/Ngrams/List.hs
View file @
d4e1dd93
...
...
@@ -130,13 +130,13 @@ reIndexWith cId lId nt lts = do
-- Get all documents of the corpus
docs
<-
selectDocNodes
cId
-- Checking Text documents where orphans match
-- TODO Tests here
let
ngramsByDoc
=
HashMap
.
fromList
$
map
(
\
(
k
,
v
)
->
(
SimpleNgrams
(
text2ngrams
k
),
v
))
$
List
.
concat
ngramsByDoc
=
map
(
HashMap
.
fromList
)
$
map
(
map
(
\
(
k
,
v
)
->
(
SimpleNgrams
(
text2ngrams
k
),
v
)))
$
map
(
\
doc
->
List
.
zip
(
termsInText
(
buildPatterns
$
map
(
\
k
->
([
unNgramsTerm
k
],
[]
))
orphans
)
$
Text
.
unlines
$
catMaybes
...
...
@@ -150,7 +150,7 @@ reIndexWith cId lId nt lts = do
printDebug
"ngramsByDoc"
ngramsByDoc
-- Saving the indexation in database
_
<-
saveDocNgramsWith
lId
ngramsByDoc
_
<-
mapM
(
saveDocNgramsWith
lId
)
ngramsByDoc
pure
()
-- ngramsByDoc
...
...
src/Gargantext/Core/Text/List.hs
View file @
d4e1dd93
...
...
@@ -104,20 +104,9 @@ buildNgramsOthersList user uCid _groupParams (nt, MapListSize mapListSize) = do
$
List
.
zip
(
HashMap
.
keys
allTerms
)
(
List
.
cycle
[
mempty
])
)
{-
if nt == Sources -- Authors
then printDebug "flowSocialList" socialLists
else printDebug "flowSocialList" ""
-}
let
groupedWithList
=
toGroupedTree
{- groupParams -}
socialLists
allTerms
{-
if nt == Sources -- Authors
then printDebug "groupedWithList" groupedWithList
else printDebug "groupedWithList" ""
-}
let
(
stopTerms
,
tailTerms
)
=
HashMap
.
partition
((
==
Just
StopTerm
)
.
viewListType
)
$
view
flc_scores
groupedWithList
...
...
@@ -129,10 +118,10 @@ buildNgramsOthersList user uCid _groupParams (nt, MapListSize mapListSize) = do
$
List
.
sortOn
(
Down
.
viewScore
.
snd
)
$
HashMap
.
toList
tailTerms'
pure
$
Map
.
fromList
[(
nt
,
(
toNgramsElement
stopTerms
)
<>
(
toNgramsElement
mapTerms
)
<>
(
toNgramsElement
$
setListType
(
Just
MapTerm
)
mapTerms'
)
<>
(
toNgramsElement
$
setListType
(
Just
CandidateTerm
)
candiTerms
)
pure
$
Map
.
fromList
[(
nt
,
(
toNgramsElement
stopTerms
)
<>
(
toNgramsElement
mapTerms
)
<>
(
toNgramsElement
$
setListType
(
Just
MapTerm
)
mapTerms'
)
<>
(
toNgramsElement
$
setListType
(
Just
CandidateTerm
)
candiTerms
)
)]
...
...
@@ -149,7 +138,6 @@ getGroupParams gp@(GroupWithPosTag l a _m) ng = do
getGroupParams
gp
_
=
pure
gp
-- TODO use ListIds
buildNgramsTermsList
::
(
HasNodeError
err
,
CmdM
env
err
m
...
...
@@ -178,18 +166,19 @@ buildNgramsTermsList user uCid mCid groupParams (nt, _mapListSize)= do
let
ngramsKeys
=
HashMap
.
keysSet
allTerms
groupParams'
<-
getGroupParams
groupParams
(
HashSet
.
map
(
text2ngrams
.
unNgramsTerm
)
ngramsKeys
)
let
socialLists_Stemmed
=
addScoreStem
groupParams'
ngramsKeys
socialLists
let
socialLists_Stemmed
=
addScoreStem
groupParams'
ngramsKeys
socialLists
--printDebug "socialLists_Stemmed" socialLists_Stemmed
let
groupedWithList
=
toGroupedTree
socialLists_Stemmed
allTerms
(
stopTerms
,
candidateTerms
)
=
HashMap
.
partition
((
==
Just
StopTerm
)
.
viewListType
)
groupedWithList
=
toGroupedTree
socialLists_Stemmed
allTerms
(
stopTerms
,
candidateTerms
)
=
HashMap
.
partition
((
==
Just
StopTerm
)
.
viewListType
)
$
view
flc_scores
groupedWithList
(
groupedMono
,
groupedMult
)
=
HashMap
.
partitionWithKey
(
\
(
NgramsTerm
t
)
_v
->
size
t
<
2
)
candidateTerms
(
groupedMono
,
groupedMult
)
=
HashMap
.
partitionWithKey
(
\
(
NgramsTerm
t
)
_v
->
size
t
<
2
)
candidateTerms
-- printDebug "stopTerms" stopTerms
-- splitting monterms and multiterms to take proportional candidates
let
-- use % of list if to big, or Int if too small
listSizeGlobal
=
2000
::
Double
monoSize
=
0.4
::
Double
...
...
@@ -208,11 +197,10 @@ buildNgramsTermsList user uCid mCid groupParams (nt, _mapListSize)= do
selectedTerms
=
Set
.
toList
$
hasTerms
(
groupedMonoHead
<>
groupedMultHead
)
-- TO remove (and remove HasNodeError instance)
-- TO
DO
remove (and remove HasNodeError instance)
userListId
<-
defaultList
uCid
masterListId
<-
defaultList
mCid
mapTextDocIds
<-
getNodesByNgramsOnlyUser
uCid
[
userListId
,
masterListId
]
nt
...
...
@@ -220,11 +208,15 @@ buildNgramsTermsList user uCid mCid groupParams (nt, _mapListSize)= do
let
groupedTreeScores_SetNodeId
::
HashMap
NgramsTerm
(
GroupedTreeScores
(
Set
NodeId
))
groupedTreeScores_SetNodeId
=
setScoresWithMap
mapTextDocIds
(
groupedMonoHead
<>
groupedMultHead
)
groupedTreeScores_SetNodeId
=
HashMap
.
filter
(
\
g
->
Set
.
size
(
view
gts'_score
g
)
>
1
)
-- removing hapax
$
setScoresWithMap
mapTextDocIds
(
groupedMonoHead
<>
groupedMultHead
)
printDebug
"groupedTreeScores_SetNodeId"
groupedTreeScores_SetNodeId
-- | Coocurrences computation
--, t1 >= t2 -- permute byAxis diag -- since matrix symmetric
let
mapCooc
=
HashMap
.
filter
(
>
2
)
let
mapCooc
=
HashMap
.
filter
(
>
1
)
-- removing cooc of 1
$
HashMap
.
fromList
[
((
t1
,
t2
),
Set
.
size
$
Set
.
intersection
s1
s2
)
|
(
t1
,
s1
)
<-
mapStemNodeIds
,
(
t2
,
s2
)
<-
mapStemNodeIds
...
...
@@ -245,10 +237,7 @@ buildNgramsTermsList user uCid mCid groupParams (nt, _mapListSize)= do
let
groupedTreeScores_SpeGen
::
HashMap
NgramsTerm
(
GroupedTreeScores
(
Scored
NgramsTerm
))
groupedTreeScores_SpeGen
=
setScoresWithMap
(
mapScores
identity
)
(
groupedMonoHead
<>
groupedMultHead
)
groupedTreeScores_SpeGen
=
setScoresWithMap
(
mapScores
identity
)
groupedTreeScores_SetNodeId
let
-- sort / partition / split
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment