Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
154
Issues
154
List
Board
Labels
Milestones
Merge Requests
12
Merge Requests
12
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
gargantext
haskell-gargantext
Commits
d4e1dd93
Commit
d4e1dd93
authored
May 03, 2021
by
Alexandre Delanoë
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
[FIX] map list size filtered (others terms do not appear any more)
parent
17fbec42
Pipeline
#1458
failed with stage
Changes
2
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
22 additions
and
33 deletions
+22
-33
List.hs
src/Gargantext/API/Ngrams/List.hs
+4
-4
List.hs
src/Gargantext/Core/Text/List.hs
+18
-29
No files found.
src/Gargantext/API/Ngrams/List.hs
View file @
d4e1dd93
...
...
@@ -130,13 +130,13 @@ reIndexWith cId lId nt lts = do
-- Get all documents of the corpus
docs
<-
selectDocNodes
cId
-- Checking Text documents where orphans match
-- TODO Tests here
let
ngramsByDoc
=
HashMap
.
fromList
$
map
(
\
(
k
,
v
)
->
(
SimpleNgrams
(
text2ngrams
k
),
v
))
$
List
.
concat
ngramsByDoc
=
map
(
HashMap
.
fromList
)
$
map
(
map
(
\
(
k
,
v
)
->
(
SimpleNgrams
(
text2ngrams
k
),
v
)))
$
map
(
\
doc
->
List
.
zip
(
termsInText
(
buildPatterns
$
map
(
\
k
->
([
unNgramsTerm
k
],
[]
))
orphans
)
$
Text
.
unlines
$
catMaybes
...
...
@@ -150,7 +150,7 @@ reIndexWith cId lId nt lts = do
printDebug
"ngramsByDoc"
ngramsByDoc
-- Saving the indexation in database
_
<-
saveDocNgramsWith
lId
ngramsByDoc
_
<-
mapM
(
saveDocNgramsWith
lId
)
ngramsByDoc
pure
()
-- ngramsByDoc
...
...
src/Gargantext/Core/Text/List.hs
View file @
d4e1dd93
...
...
@@ -104,20 +104,9 @@ buildNgramsOthersList user uCid _groupParams (nt, MapListSize mapListSize) = do
$
List
.
zip
(
HashMap
.
keys
allTerms
)
(
List
.
cycle
[
mempty
])
)
{-
if nt == Sources -- Authors
then printDebug "flowSocialList" socialLists
else printDebug "flowSocialList" ""
-}
let
groupedWithList
=
toGroupedTree
{- groupParams -}
socialLists
allTerms
{-
if nt == Sources -- Authors
then printDebug "groupedWithList" groupedWithList
else printDebug "groupedWithList" ""
-}
let
(
stopTerms
,
tailTerms
)
=
HashMap
.
partition
((
==
Just
StopTerm
)
.
viewListType
)
$
view
flc_scores
groupedWithList
...
...
@@ -129,10 +118,10 @@ buildNgramsOthersList user uCid _groupParams (nt, MapListSize mapListSize) = do
$
List
.
sortOn
(
Down
.
viewScore
.
snd
)
$
HashMap
.
toList
tailTerms'
pure
$
Map
.
fromList
[(
nt
,
(
toNgramsElement
stopTerms
)
<>
(
toNgramsElement
mapTerms
)
<>
(
toNgramsElement
$
setListType
(
Just
MapTerm
)
mapTerms'
)
<>
(
toNgramsElement
$
setListType
(
Just
CandidateTerm
)
candiTerms
)
pure
$
Map
.
fromList
[(
nt
,
(
toNgramsElement
stopTerms
)
<>
(
toNgramsElement
mapTerms
)
<>
(
toNgramsElement
$
setListType
(
Just
MapTerm
)
mapTerms'
)
<>
(
toNgramsElement
$
setListType
(
Just
CandidateTerm
)
candiTerms
)
)]
...
...
@@ -149,7 +138,6 @@ getGroupParams gp@(GroupWithPosTag l a _m) ng = do
getGroupParams
gp
_
=
pure
gp
-- TODO use ListIds
buildNgramsTermsList
::
(
HasNodeError
err
,
CmdM
env
err
m
...
...
@@ -178,18 +166,19 @@ buildNgramsTermsList user uCid mCid groupParams (nt, _mapListSize)= do
let
ngramsKeys
=
HashMap
.
keysSet
allTerms
groupParams'
<-
getGroupParams
groupParams
(
HashSet
.
map
(
text2ngrams
.
unNgramsTerm
)
ngramsKeys
)
let
socialLists_Stemmed
=
addScoreStem
groupParams'
ngramsKeys
socialLists
let
socialLists_Stemmed
=
addScoreStem
groupParams'
ngramsKeys
socialLists
--printDebug "socialLists_Stemmed" socialLists_Stemmed
let
groupedWithList
=
toGroupedTree
socialLists_Stemmed
allTerms
(
stopTerms
,
candidateTerms
)
=
HashMap
.
partition
((
==
Just
StopTerm
)
.
viewListType
)
groupedWithList
=
toGroupedTree
socialLists_Stemmed
allTerms
(
stopTerms
,
candidateTerms
)
=
HashMap
.
partition
((
==
Just
StopTerm
)
.
viewListType
)
$
view
flc_scores
groupedWithList
(
groupedMono
,
groupedMult
)
=
HashMap
.
partitionWithKey
(
\
(
NgramsTerm
t
)
_v
->
size
t
<
2
)
candidateTerms
(
groupedMono
,
groupedMult
)
=
HashMap
.
partitionWithKey
(
\
(
NgramsTerm
t
)
_v
->
size
t
<
2
)
candidateTerms
-- printDebug "stopTerms" stopTerms
-- splitting monterms and multiterms to take proportional candidates
let
-- use % of list if to big, or Int if too small
listSizeGlobal
=
2000
::
Double
monoSize
=
0.4
::
Double
...
...
@@ -208,11 +197,10 @@ buildNgramsTermsList user uCid mCid groupParams (nt, _mapListSize)= do
selectedTerms
=
Set
.
toList
$
hasTerms
(
groupedMonoHead
<>
groupedMultHead
)
-- TO remove (and remove HasNodeError instance)
-- TO
DO
remove (and remove HasNodeError instance)
userListId
<-
defaultList
uCid
masterListId
<-
defaultList
mCid
mapTextDocIds
<-
getNodesByNgramsOnlyUser
uCid
[
userListId
,
masterListId
]
nt
...
...
@@ -220,11 +208,15 @@ buildNgramsTermsList user uCid mCid groupParams (nt, _mapListSize)= do
let
groupedTreeScores_SetNodeId
::
HashMap
NgramsTerm
(
GroupedTreeScores
(
Set
NodeId
))
groupedTreeScores_SetNodeId
=
setScoresWithMap
mapTextDocIds
(
groupedMonoHead
<>
groupedMultHead
)
groupedTreeScores_SetNodeId
=
HashMap
.
filter
(
\
g
->
Set
.
size
(
view
gts'_score
g
)
>
1
)
-- removing hapax
$
setScoresWithMap
mapTextDocIds
(
groupedMonoHead
<>
groupedMultHead
)
printDebug
"groupedTreeScores_SetNodeId"
groupedTreeScores_SetNodeId
-- | Coocurrences computation
--, t1 >= t2 -- permute byAxis diag -- since matrix symmetric
let
mapCooc
=
HashMap
.
filter
(
>
2
)
let
mapCooc
=
HashMap
.
filter
(
>
1
)
-- removing cooc of 1
$
HashMap
.
fromList
[
((
t1
,
t2
),
Set
.
size
$
Set
.
intersection
s1
s2
)
|
(
t1
,
s1
)
<-
mapStemNodeIds
,
(
t2
,
s2
)
<-
mapStemNodeIds
...
...
@@ -245,10 +237,7 @@ buildNgramsTermsList user uCid mCid groupParams (nt, _mapListSize)= do
let
groupedTreeScores_SpeGen
::
HashMap
NgramsTerm
(
GroupedTreeScores
(
Scored
NgramsTerm
))
groupedTreeScores_SpeGen
=
setScoresWithMap
(
mapScores
identity
)
(
groupedMonoHead
<>
groupedMultHead
)
groupedTreeScores_SpeGen
=
setScoresWithMap
(
mapScores
identity
)
groupedTreeScores_SetNodeId
let
-- sort / partition / split
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment