Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
197
Issues
197
List
Board
Labels
Milestones
Merge Requests
12
Merge Requests
12
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
gargantext
haskell-gargantext
Commits
4463a799
Commit
4463a799
authored
Sep 21, 2020
by
Alexandre Delanoë
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
[TextFlow] MapList Global score that needs local score (WIP)
parent
3bb9fb2f
Pipeline
#1085
failed with stage
Changes
2
Pipelines
1
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
115 additions
and
33 deletions
+115
-33
List.hs
src/Gargantext/Core/Text/List.hs
+88
-33
IncExc.hs
src/Gargantext/Core/Text/Metrics/SpeGen/IncExc.hs
+27
-0
No files found.
src/Gargantext/Core/Text/List.hs
View file @
4463a799
...
...
@@ -14,6 +14,7 @@ module Gargantext.Core.Text.List
where
-- import Data.Either (partitionEithers, Either(..))
import
Data.Maybe
(
fromMaybe
)
import
Data.Map
(
Map
)
import
Data.Set
(
Set
)
import
Data.Text
(
Text
)
...
...
@@ -105,27 +106,65 @@ buildNgramsTermsList :: Lang
->
MasterCorpusId
->
Cmd
err
(
Map
NgramsType
[
NgramsElement
])
buildNgramsTermsList
l
n
m
s
uCid
mCid
=
do
candidates
<-
sortTficf
Up
<$>
getTficf
uCid
mCid
NgramsTerms
-- printDebug "head candidates" (List.take 10 $ candidates)
-- printDebug "tail candidates" (List.take 10 $ List.reverse $ candidates)
-- Computing global speGen score
-- TODO sort is not needed, just take the score
allTerms
<-
sortTficf
Up
<$>
getTficf
uCid
mCid
NgramsTerms
-- printDebug "head candidates" (List.take 10 $ allTerms)
-- printDebug "tail candidates" (List.take 10 $ List.reverse $ allTerms)
-- First remove stops terms
let
(
stopTerms
,
candidateTerms
)
=
List
.
partition
((
isStopTerm
s
)
.
fst
)
allTerms
-- Grouping the ngrams and keeping the maximum score for label
let
grouped
=
groupStems'
$
map
(
\
(
t
,
d
)
->
let
stem
=
ngramsGroup
l
n
m
t
in
(
stem
,
GroupedText
Nothing
t
d
Set
.
empty
(
size
t
)
stem
)
)
candidateTerms
(
groupedMono
,
groupedMult
)
=
Map
.
partition
(
\
gt
->
_gt_size
gt
<
2
)
grouped
-- splitting monterms and multiterms to take proportional candidates
let
listSize
=
400
::
Double
(
candidatesHead
,
candidatesTail0
)
=
List
.
splitAt
3
candidates
listSizeGlobal
=
2000
::
Double
-- use % of list if to big, or Int if to small
monoSizeGlobal
=
0.6
::
Double
multSizeGlobal
=
1
-
monoSizeGlobal
splitAt
n
ns
=
List
.
splitAt
(
round
$
n
*
listSizeGlobal
)
$
List
.
sort
$
Map
.
elems
ns
(
groupedMonoHead
,
groupedMonoTail
)
=
splitAt
monoSizeGlobal
groupedMono
(
groupedMultHead
,
groupedMultTail
)
=
splitAt
multSizeGlobal
groupedMult
-- Get Local Scores now for selected grouped ngrams
selectedTerms
=
Set
.
toList
$
List
.
foldl'
(
\
set
(
GroupedText
_
l
_
g
_
_
)
->
Set
.
union
set
$
Set
.
union
g
$
Set
.
singleton
l
)
Set
.
empty
(
groupedMonoHead
<>
groupedMultHead
)
(
mono
,
multi
)
=
List
.
partition
(
\
t
->
(
size
.
fst
)
t
<
2
)
candidatesTail0
(
monoHead
,
monoTail
)
=
List
.
splitAt
(
round
$
0.60
*
listSize
)
mono
(
multiHead
,
multiTail
)
=
List
.
splitAt
(
round
$
0.40
*
listSize
)
multi
termList
=
(
map
(
toGargList
((
isStopTerm
s
)
.
fst
)
CandidateTerm
)
candidatesHead
)
<>
(
map
(
toGargList
((
isStopTerm
s
)
.
fst
)
MapTerm
)
(
monoHead
<>
multiHead
))
<>
(
map
(
toGargList
((
isStopTerm
s
)
.
fst
)
CandidateTerm
)
(
monoTail
<>
multiTail
))
(
mono
,
multi
)
=
List
.
partition
(
\
t
->
(
size
.
fst
)
t
<
2
)
candidateTerms
(
monoHead
,
monoTail
)
=
List
.
splitAt
(
round
$
0.60
*
listSizeGlobal
)
mono
(
multiHead
,
multiTail
)
=
List
.
splitAt
(
round
$
0.40
*
listSizeGlobal
)
multi
-- Computing local speGen score
listSizeLocal
=
350
::
Double
-- Final Step building the Typed list
termList
=
(
map
(
toGargList
$
Just
StopTerm
)
stopTerms
)
<>
(
map
(
toGargList
$
Just
MapTerm
)
(
monoHead
<>
multiHead
))
<>
(
map
(
toGargList
$
Just
CandidateTerm
)
(
monoTail
<>
multiTail
))
ngs
=
List
.
concat
$
map
toNgramsElement
$
groupStems
$
map
(
\
(
listType
,
(
t
,
d
))
->
(
ngramsGroup
l
n
m
t
,
GroupedText
listType
t
d
Set
.
empty
$
map
(
\
(
listType
,
(
t
,
d
))
->
let
stem
=
ngramsGroup
l
n
m
t
in
(
stem
,
GroupedText
listType
t
d
Set
.
empty
(
size
t
)
stem
)
)
termList
...
...
@@ -134,42 +173,58 @@ buildNgramsTermsList l n m s uCid mCid = do
type
Group
=
Lang
->
Int
->
Int
->
Text
->
Text
type
Stem
=
Text
type
Label
=
Text
data
GroupedText
=
GroupedText
{
_gt_listType
::
ListType
data
GroupedText
score
=
GroupedText
{
_gt_listType
::
Maybe
ListType
,
_gt_label
::
Label
,
_gt_score
::
Doubl
e
,
_gt_score
::
scor
e
,
_gt_group
::
Set
Text
,
_gt_size
::
Int
,
_gt_stem
::
Stem
}
groupStems
::
[(
Stem
,
GroupedText
)]
->
[
GroupedText
]
groupStems
=
Map
.
elems
.
Map
.
fromListWith
grouping
instance
(
Eq
a
)
=>
Eq
(
GroupedText
a
)
where
(
==
)
(
GroupedText
_
_
score1
_
_
_
)
(
GroupedText
_
_
score2
_
_
_
)
=
(
==
)
score1
score2
instance
(
Eq
a
,
Ord
a
)
=>
Ord
(
GroupedText
a
)
where
compare
(
GroupedText
_
_
score1
_
_
_
)
(
GroupedText
_
_
score2
_
_
_
)
=
compare
score1
score2
groupStems
::
[(
Stem
,
GroupedText
Double
)]
->
[
GroupedText
Double
]
groupStems
=
Map
.
elems
.
groupStems'
groupStems'
::
[(
Stem
,
GroupedText
Double
)]
->
Map
Stem
(
GroupedText
Double
)
groupStems'
=
Map
.
fromListWith
grouping
where
grouping
(
GroupedText
lt1
label1
score1
group1
)
(
GroupedText
lt2
label2
score2
group2
)
|
score1
>=
score2
=
GroupedText
lt
label1
score1
(
Set
.
insert
label2
gr
)
|
otherwise
=
GroupedText
lt
label2
score2
(
Set
.
insert
label1
gr
)
grouping
(
GroupedText
lt1
label1
score1
group1
s1
stem1
)
(
GroupedText
lt2
label2
score2
group2
s2
stem2
)
|
score1
>=
score2
=
GroupedText
lt
label1
score1
(
Set
.
insert
label2
gr
)
s1
stem1
|
otherwise
=
GroupedText
lt
label2
score2
(
Set
.
insert
label1
gr
)
s2
stem2
where
lt
=
lt1
<>
lt2
gr
=
Set
.
union
group1
group2
toNgramsElement
::
GroupedText
->
[
NgramsElement
]
toNgramsElement
(
GroupedText
listType
label
_
setNgrams
)
=
toNgramsElement
::
GroupedText
Double
->
[
NgramsElement
]
toNgramsElement
(
GroupedText
listType
label
_
setNgrams
_
_
)
=
[
parentElem
]
<>
childrenElems
where
parent
=
label
children
=
Set
.
toList
setNgrams
parentElem
=
mkNgramsElement
(
NgramsTerm
parent
)
listType
(
fromMaybe
CandidateTerm
listType
)
Nothing
(
mSetFromList
(
NgramsTerm
<$>
children
))
childrenElems
=
map
(
\
t
->
mkNgramsElement
t
listType
childrenElems
=
map
(
\
t
->
mkNgramsElement
t
(
fromMaybe
CandidateTerm
$
listType
)
(
Just
$
RootParent
(
NgramsTerm
parent
)
(
NgramsTerm
parent
))
(
mSetFromList
[]
)
)
(
NgramsTerm
<$>
children
)
toGargList
::
(
b
->
Bool
)
->
ListType
->
b
->
(
ListType
,
b
)
toGargList
isStop
l
n
=
case
isStop
n
of
True
->
(
StopTerm
,
n
)
False
->
(
l
,
n
)
toGargList
::
Maybe
ListType
->
b
->
(
Maybe
ListType
,
b
)
toGargList
l
n
=
(
l
,
n
)
isStopTerm
::
StopSize
->
Text
->
Bool
...
...
src/Gargantext/Core/Text/Metrics/SpeGen/IncExc.hs
0 → 100644
View file @
4463a799
{-|
Module : Gargantext.Core.Text.Metrics.SpeGen.IncExc
Description :
Copyright : (c) CNRS, 2017
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
module
Gargantext.Core.Text.Metrics.SpeGen.IncExc
where
{-
data IncExc = Inclusion { unInclusion :: !Double }
| Exclusion { unExclusion :: !Double }
-}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment