Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
H
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Przemyslaw Kaminski
haskell-gargantext
Commits
48eb263b
Commit
48eb263b
authored
Sep 22, 2020
by
Alexandre Delanoë
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
[TextFlow] Local SpeGen computed, needs Metrics/Scored refactoring
parent
7fd045e8
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
78 additions
and
33 deletions
+78
-33
List.hs
src/Gargantext/Core/Text/List.hs
+78
-33
No files found.
src/Gargantext/Core/Text/List.hs
View file @
48eb263b
...
...
@@ -9,11 +9,13 @@ Portability : POSIX
-}
{-# LANGUAGE TemplateHaskell #-}
module
Gargantext.Core.Text.List
where
-- import Data.Either (partitionEithers, Either(..))
import
Control.Lens
(
makeLenses
,
set
)
import
Data.Maybe
(
fromMaybe
)
import
Data.Map
(
Map
)
import
Data.Set
(
Set
)
...
...
@@ -28,6 +30,8 @@ import Gargantext.API.Ngrams (NgramsElement, mkNgramsElement, NgramsTerm(..), Ro
-- import Gargantext.API.Ngrams.Tools (getCoocByNgrams', Diagonal(..))
import
Gargantext.Core
(
Lang
(
..
))
import
Gargantext.Core.Types
(
ListType
(
..
),
MasterCorpusId
,
UserCorpusId
,
Ordering
(
..
))
import
Gargantext.Database.Admin.Types.Node
(
NodeId
)
import
Gargantext.Core.Text.Metrics
(
scored'
)
import
Gargantext.Database.Action.Metrics.NgramsByNode
(
ngramsGroup
,
getNodesByNgramsUser
,
groupNodesByNgramsWith
,
getNodesByNgramsOnlyUser
)
import
Gargantext.Database.Action.Metrics.TFICF
(
getTficf
)
import
Gargantext.Database.Query.Table.Node
(
defaultList
)
...
...
@@ -124,7 +128,7 @@ buildNgramsTermsList l n m s uCid mCid = do
let
grouped
=
groupStems'
$
map
(
\
(
t
,
d
)
->
let
stem
=
ngramsGroup
l
n
m
t
in
(
stem
,
GroupedText
Nothing
t
d
Set
.
empty
(
size
t
)
stem
,
GroupedText
Nothing
t
d
Set
.
empty
(
size
t
)
stem
Set
.
empty
)
)
candidateTerms
...
...
@@ -143,9 +147,9 @@ buildNgramsTermsList l n m s uCid mCid = do
-- Get Local Scores now for selected grouped ngrams
selectedTerms
=
Set
.
toList
$
List
.
foldl'
(
\
set
(
GroupedText
_
l
_
g
_
_
)
->
Set
.
union
set
$
Set
.
union
g
$
Set
.
singleton
l
(
\
set
(
GroupedText
_
l
_
g
_
_
_
)
->
Set
.
union
set
$
Set
.
union
g
$
Set
.
singleton
l
)
Set
.
empty
(
groupedMonoHead
<>
groupedMultHead
)
...
...
@@ -155,9 +159,36 @@ buildNgramsTermsList l n m s uCid mCid = do
masterListId
<-
defaultList
mCid
mapTextDocIds
<-
getNodesByNgramsOnlyUser
uCid
[
userListId
,
masterListId
]
NgramsTerms
selectedTerms
-- groups Set NodeId
let
mapGroups
=
Map
.
fromList
$
map
(
\
g
->
(
_gt_stem
g
,
g
))
$
groupedMonoHead
<>
groupedMultHead
-- grouping with Set NodeId
contextsAdded
=
foldl'
(
\
mapGroups'
k
->
let
k'
=
ngramsGroup
l
n
m
k
in
case
Map
.
lookup
k'
mapGroups'
of
Nothing
->
mapGroups'
Just
g
->
case
Map
.
lookup
k
mapTextDocIds
of
Nothing
->
mapGroups'
Just
ns
->
Map
.
insert
k'
(
g
{
_gt_nodes
=
Set
.
union
ns
(
_gt_nodes
g
)})
mapGroups'
)
mapGroups
$
Map
.
keys
mapTextDocIds
-- compute cooccurrences
-- compute scores
mapCooc
=
Map
.
fromList
[
((
t1
,
t2
),
Set
.
size
$
Set
.
intersection
s1
s2
)
|
(
t1
,
s1
)
<-
mapStemNodeIds
,
(
t2
,
s2
)
<-
mapStemNodeIds
,
t1
/=
t2
-- Null Diagonal
]
where
mapStemNodeIds
=
Map
.
toList
$
Map
.
map
(
_gt_nodes
)
contextsAdded
-- computing scores
scores
=
scored'
mapCooc
-- dilate scores
-- sort / filter
...
...
@@ -175,56 +206,41 @@ buildNgramsTermsList l n m s uCid mCid = do
<>
(
map
(
toGargList
$
Just
MapTerm
)
(
monoHead
<>
multiHead
))
<>
(
map
(
toGargList
$
Just
CandidateTerm
)
(
monoTail
<>
multiTail
))
ngs
=
List
.
concat
$
map
toNgramsElement
$
groupStems
$
map
(
\
(
listType
,
(
t
,
d
))
->
let
stem
=
ngramsGroup
l
n
m
t
in
(
stem
,
GroupedText
listType
t
d
Set
.
empty
(
size
t
)
stem
,
GroupedText
listType
t
d
Set
.
empty
(
size
t
)
stem
Set
.
empty
)
)
termList
pure
$
Map
.
fromList
[(
NgramsTerms
,
ngs
)]
type
Group
=
Lang
->
Int
->
Int
->
Text
->
Text
type
Stem
=
Text
type
Label
=
Text
data
GroupedText
score
=
GroupedText
{
_gt_listType
::
!
(
Maybe
ListType
)
,
_gt_label
::
!
Label
,
_gt_score
::
!
score
,
_gt_group
::
!
(
Set
Text
)
,
_gt_size
::
!
Int
,
_gt_stem
::
!
Stem
}
instance
(
Eq
a
)
=>
Eq
(
GroupedText
a
)
where
(
==
)
(
GroupedText
_
_
score1
_
_
_
)
(
GroupedText
_
_
score2
_
_
_
)
=
(
==
)
score1
score2
instance
(
Eq
a
,
Ord
a
)
=>
Ord
(
GroupedText
a
)
where
compare
(
GroupedText
_
_
score1
_
_
_
)
(
GroupedText
_
_
score2
_
_
_
)
=
compare
score1
score2
groupStems
::
[(
Stem
,
GroupedText
Double
)]
->
[
GroupedText
Double
]
groupStems
=
Map
.
elems
.
groupStems'
groupStems'
::
[(
Stem
,
GroupedText
Double
)]
->
Map
Stem
(
GroupedText
Double
)
groupStems'
=
Map
.
fromListWith
grouping
where
grouping
(
GroupedText
lt1
label1
score1
group1
s1
stem1
)
(
GroupedText
lt2
label2
score2
group2
s2
stem2
)
|
score1
>=
score2
=
GroupedText
lt
label1
score1
(
Set
.
insert
label2
gr
)
s1
stem1
|
otherwise
=
GroupedText
lt
label2
score2
(
Set
.
insert
label1
gr
)
s2
stem2
grouping
(
GroupedText
lt1
label1
score1
group1
s1
stem1
nodes1
)
(
GroupedText
lt2
label2
score2
group2
s2
stem2
nodes2
)
|
score1
>=
score2
=
GroupedText
lt
label1
score1
(
Set
.
insert
label2
gr
)
s1
stem1
nodes
|
otherwise
=
GroupedText
lt
label2
score2
(
Set
.
insert
label1
gr
)
s2
stem2
nodes
where
lt
=
lt1
<>
lt2
gr
=
Set
.
union
group1
group2
gr
=
Set
.
union
group1
group2
nodes
=
Set
.
union
nodes1
nodes2
toNgramsElement
::
GroupedText
Double
->
[
NgramsElement
]
toNgramsElement
(
GroupedText
listType
label
_
setNgrams
_
_
)
=
toNgramsElement
(
GroupedText
listType
label
_
setNgrams
_
_
_
)
=
[
parentElem
]
<>
childrenElems
where
parent
=
label
...
...
@@ -247,3 +263,32 @@ isStopTerm :: StopSize -> Text -> Bool
isStopTerm
(
StopSize
n
)
x
=
Text
.
length
x
<
n
||
any
isStopChar
(
Text
.
unpack
x
)
where
isStopChar
c
=
not
(
c
`
elem
`
(
"- /()%"
::
[
Char
])
||
Char
.
isAlpha
c
)
------------------------------------------------------------------------------
type
Group
=
Lang
->
Int
->
Int
->
Text
->
Text
type
Stem
=
Text
type
Label
=
Text
data
GroupedText
score
=
GroupedText
{
_gt_listType
::
!
(
Maybe
ListType
)
,
_gt_label
::
!
Label
,
_gt_score
::
!
score
,
_gt_group
::
!
(
Set
Text
)
,
_gt_size
::
!
Int
,
_gt_stem
::
!
Stem
,
_gt_nodes
::
!
(
Set
NodeId
)
}
instance
(
Eq
a
)
=>
Eq
(
GroupedText
a
)
where
(
==
)
(
GroupedText
_
_
score1
_
_
_
_
)
(
GroupedText
_
_
score2
_
_
_
_
)
=
(
==
)
score1
score2
instance
(
Eq
a
,
Ord
a
)
=>
Ord
(
GroupedText
a
)
where
compare
(
GroupedText
_
_
score1
_
_
_
_
)
(
GroupedText
_
_
score2
_
_
_
_
)
=
compare
score1
score2
-- Lenses Instances
makeLenses
'G
r
oupedText
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment