Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
153
Issues
153
List
Board
Labels
Milestones
Merge Requests
12
Merge Requests
12
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
gargantext
haskell-gargantext
Commits
9e3e9e0f
Commit
9e3e9e0f
authored
Sep 22, 2020
by
Alexandre Delanoë
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
[TextFlow] MapList building, score needs normalization
parent
48eb263b
Pipeline
#1088
failed with stage
Changes
5
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
96 additions
and
102 deletions
+96
-102
Main.hs
bin/gargantext-init/Main.hs
+0
-2
Ngrams.hs
src/Gargantext/API/Ngrams.hs
+3
-3
List.hs
src/Gargantext/Core/Text/List.hs
+91
-47
Metrics.hs
src/Gargantext/Core/Text/Metrics.hs
+1
-49
Metrics.hs
src/Gargantext/Database/Action/Metrics.hs
+1
-1
No files found.
bin/gargantext-init/Main.hs
View file @
9e3e9e0f
...
...
@@ -17,7 +17,6 @@ module Main where
import
Data.Text
(
Text
)
import
Data.Either
(
Either
(
..
))
import
Data.Maybe
(
Maybe
(
..
))
import
Gargantext.API.Admin.Settings
(
withDevEnv
,
runCmdDev
)
import
Gargantext.API.Prelude
(
GargError
)
import
Gargantext.API.Node
()
-- instances only
...
...
@@ -29,7 +28,6 @@ import Gargantext.Database.Admin.Config (userMaster, corpusMasterName)
import
Gargantext.Database.Admin.Types.Node
import
Gargantext.Database.Admin.Trigger.Init
(
initFirstTriggers
,
initLastTriggers
)
import
Gargantext.Database.Admin.Types.Hyperdata
(
HyperdataCorpus
)
import
Gargantext.Database.Admin.Types.Node
(
CorpusId
,
RootId
,
ListId
)
import
Gargantext.Database.Prelude
(
Cmd
,
)
import
Gargantext.Prelude
import
System.Environment
(
getArgs
)
...
...
src/Gargantext/API/Ngrams.hs
View file @
9e3e9e0f
...
...
@@ -968,9 +968,9 @@ putListNgrams' :: (HasInvalidError err, RepoCmdM env err m)
->
Map
NgramsTerm
NgramsRepoElement
->
m
()
putListNgrams'
nodeId
ngramsType
ns
=
do
printDebug
"[putListNgrams'] nodeId"
nodeId
printDebug
"[putListNgrams'] ngramsType"
ngramsType
printDebug
"[putListNgrams'] ns"
ns
--
printDebug "[putListNgrams'] nodeId" nodeId
--
printDebug "[putListNgrams'] ngramsType" ngramsType
--
printDebug "[putListNgrams'] ns" ns
let
p1
=
NgramsTablePatch
.
PM
.
fromMap
$
NgramsReplace
Nothing
.
Just
<$>
ns
(
p0
,
p0_validity
)
=
PM
.
singleton
nodeId
p1
...
...
src/Gargantext/Core/Text/List.hs
View file @
9e3e9e0f
...
...
@@ -14,9 +14,9 @@ Portability : POSIX
module
Gargantext.Core.Text.List
where
-- import Data.Either (partitionEithers, Either(..)
)
import
Control.Lens
(
makeLenses
,
set
)
import
Data.
Maybe
(
fromMaybe
)
import
Control.Lens
(
makeLenses
)
import
Data.Maybe
(
fromMaybe
,
catMaybes
)
import
Data.
Ord
(
Down
(
..
)
)
import
Data.Map
(
Map
)
import
Data.Set
(
Set
)
import
Data.Text
(
Text
)
...
...
@@ -29,9 +29,9 @@ import qualified Data.Text as Text
import
Gargantext.API.Ngrams
(
NgramsElement
,
mkNgramsElement
,
NgramsTerm
(
..
),
RootParent
(
..
),
mSetFromList
)
-- import Gargantext.API.Ngrams.Tools (getCoocByNgrams', Diagonal(..))
import
Gargantext.Core
(
Lang
(
..
))
import
Gargantext.Core.Types
(
ListType
(
..
),
MasterCorpusId
,
UserCorpusId
,
Ordering
(
..
)
)
import
Gargantext.Core.Types
(
ListType
(
..
),
MasterCorpusId
,
UserCorpusId
)
import
Gargantext.Database.Admin.Types.Node
(
NodeId
)
import
Gargantext.Core.Text.Metrics
(
scored'
)
import
Gargantext.Core.Text.Metrics
(
scored'
,
Scored
(
..
)
)
import
Gargantext.Database.Action.Metrics.NgramsByNode
(
ngramsGroup
,
getNodesByNgramsUser
,
groupNodesByNgramsWith
,
getNodesByNgramsOnlyUser
)
import
Gargantext.Database.Action.Metrics.TFICF
(
getTficf
)
import
Gargantext.Database.Query.Table.Node
(
defaultList
)
...
...
@@ -117,12 +117,14 @@ buildNgramsTermsList l n m s uCid mCid = do
-- Computing global speGen score
allTerms
<-
Map
.
toList
<$>
getTficf
uCid
mCid
NgramsTerms
-- printDebug "head candidates" (List.take 10 $ allTerms)
-- printDebug "tail candidates" (List.take 10 $ List.reverse $ allTerms)
-- First remove stops terms
let
(
stopTerms
,
candidateTerms
)
=
List
.
partition
((
isStopTerm
s
)
.
fst
)
allTerms
let
-- stopTerms ignored for now (need to be tagged already)
(
_stopTerms
,
candidateTerms
)
=
List
.
partition
((
isStopTerm
s
)
.
fst
)
allTerms
-- Grouping the ngrams and keeping the maximum score for label
let
grouped
=
groupStems'
...
...
@@ -134,22 +136,29 @@ buildNgramsTermsList l n m s uCid mCid = do
(
groupedMono
,
groupedMult
)
=
Map
.
partition
(
\
gt
->
_gt_size
gt
<
2
)
grouped
-- printDebug "groupedMult" groupedMult
-- splitting monterms and multiterms to take proportional candidates
let
listSizeGlobal
=
2000
::
Double
-- use % of list if to big, or Int if to small
monoSize
Global
=
0.6
::
Double
multSize
Global
=
1
-
monoSizeGlobal
monoSize
=
0.4
::
Double
multSize
=
1
-
monoSize
splitAt
n
ns
=
List
.
splitAt
(
round
$
n
*
listSizeGlobal
)
$
List
.
sort
$
Map
.
elems
ns
splitAt
n'
ns
=
List
.
splitAt
(
round
$
n'
*
listSizeGlobal
)
$
List
.
sort
$
Map
.
elems
ns
(
groupedMonoHead
,
groupedMonoTail
)
=
splitAt
monoSize
groupedMono
(
groupedMultHead
,
groupedMultTail
)
=
splitAt
multSize
groupedMult
(
groupedMonoHead
,
groupedMonoTail
)
=
splitAt
monoSizeGlobal
groupedMono
(
groupedMultHead
,
groupedMultTail
)
=
splitAt
multSizeGlobal
groupedMult
printDebug
"groupedMonoHead"
(
List
.
length
groupedMonoHead
)
printDebug
"groupedMonoTail"
(
List
.
length
groupedMonoHead
)
printDebug
"groupedMultHead"
(
List
.
length
groupedMultHead
)
printDebug
"groupedMultTail"
(
List
.
length
groupedMultTail
)
let
-- Get Local Scores now for selected grouped ngrams
selectedTerms
=
Set
.
toList
$
List
.
foldl'
(
\
set
(
GroupedText
_
l
_
g
_
_
_
)
->
Set
.
union
set
$
Set
.
union
g
$
Set
.
singleton
l
(
\
set
'
(
GroupedText
_
l'
_
g
_
_
_
)
->
Set
.
union
set'
$
Set
.
union
g
$
Set
.
singleton
l'
)
Set
.
empty
(
groupedMonoHead
<>
groupedMultHead
)
...
...
@@ -176,50 +185,84 @@ buildNgramsTermsList l n m s uCid mCid = do
$
Map
.
keys
mapTextDocIds
-- compute cooccurrences
mapCooc
=
Map
.
fromList
[
((
t1
,
t2
),
Set
.
size
$
Set
.
intersection
s1
s2
)
mapCooc
=
Map
.
f
ilter
(
>
2
)
$
Map
.
f
romList
[
((
t1
,
t2
),
Set
.
size
$
Set
.
intersection
s1
s2
)
|
(
t1
,
s1
)
<-
mapStemNodeIds
,
(
t2
,
s2
)
<-
mapStemNodeIds
,
t1
/=
t2
-- Null Diagonal
]
where
mapStemNodeIds
=
Map
.
toList
$
Map
.
map
(
_gt_nodes
)
contextsAdded
-- printDebug "mapCooc" mapCooc
let
-- computing scores
scores
=
scored'
mapCooc
-- dilate scores
-- sort / filter
mapScores
f
=
Map
.
fromList
$
map
(
\
(
Scored
t
g
s'
)
->
(
t
,
f
(
g
,
s'
)))
$
scored'
mapCooc
groupsWithScores
=
catMaybes
$
map
(
\
(
stem
,
g
)
->
case
Map
.
lookup
stem
mapScores'
of
Nothing
->
Nothing
Just
s'
->
Just
$
g
{
_gt_score
=
s'
}
)
$
Map
.
toList
contextsAdded
where
mapScores'
=
mapScores
adapt1
-- identity
adapt1
(
s1
,
s2
)
=
(
log'
5
s1
,
log'
2
s2
)
log'
n'
x
=
1
+
(
if
x
<=
0
then
0
else
log
$
(
10
^
(
n'
::
Int
))
*
x
)
-- adapt2 TOCHECK with DC
-- printDebug "groupsWithScores" groupsWithScores
let
(
mono
,
multi
)
=
List
.
partition
(
\
t
->
(
size
.
fst
)
t
<
2
)
candidateTerms
(
monoHead
,
monoTail
)
=
List
.
splitAt
(
round
$
0.60
*
listSizeGlobal
)
mono
(
multiHead
,
multiTail
)
=
List
.
splitAt
(
round
$
0.40
*
listSizeGlobal
)
multi
-- Computing local speGen score
listSizeLocal
=
350
::
Double
-- Final Step building the Typed list
termList
=
(
map
(
toGargList
$
Just
StopTerm
)
stopTerms
)
<>
(
map
(
toGargList
$
Just
MapTerm
)
(
monoHead
<>
multiHead
))
<>
(
map
(
toGargList
$
Just
CandidateTerm
)
(
monoTail
<>
multiTail
))
-- sort / partition / split
-- filter mono/multi again
(
monoScored
,
multScored
)
=
List
.
partition
(
\
g
->
_gt_size
g
<
2
)
groupsWithScores
-- filter with max score
partitionWithMaxScore
=
List
.
partition
(
\
g
->
let
(
s1
,
s2
)
=
_gt_score
g
in
s1
>
s2
)
(
monoScoredIncl
,
monoScoredExcl
)
=
partitionWithMaxScore
monoScored
(
multScoredIncl
,
multScoredExcl
)
=
partitionWithMaxScore
multScored
-- splitAt
let
listSizeLocal
=
1000
::
Double
-- use % of list if to big, or Int if to small
inclSize
=
0.4
::
Double
exclSize
=
1
-
inclSize
splitAt'
n'
=
List
.
splitAt
(
round
$
n'
*
listSizeLocal
)
(
monoScoredInclHead
,
monoScoredInclTail
)
=
splitAt'
(
monoSize
*
inclSize
/
2
)
$
List
.
sortOn
(
Down
.
_gt_score
)
monoScoredIncl
(
monoScoredExclHead
,
monoScoredExclTail
)
=
splitAt'
(
monoSize
*
inclSize
/
2
)
$
List
.
sortOn
(
Down
.
_gt_score
)
monoScoredExcl
(
multScoredInclHead
,
multScoredInclTail
)
=
splitAt'
(
multSize
*
exclSize
/
2
)
$
List
.
sortOn
(
Down
.
_gt_score
)
multScoredIncl
(
multScoredExclHead
,
multScoredExclTail
)
=
splitAt'
(
multSize
*
exclSize
/
2
)
$
List
.
sortOn
(
Down
.
_gt_score
)
multScoredExcl
ngs
=
List
.
concat
$
map
toNgramsElement
$
groupStems
$
map
(
\
(
listType
,
(
t
,
d
))
->
let
stem
=
ngramsGroup
l
n
m
t
in
(
stem
,
GroupedText
listType
t
d
Set
.
empty
(
size
t
)
stem
Set
.
empty
)
)
termList
pure
$
Map
.
fromList
[(
NgramsTerms
,
ngs
)]
-- Final Step building the Typed list
-- (map (toGargList $ Just StopTerm) stopTerms) -- Removing stops (needs social score)
termListHead
=
(
map
(
\
g
->
g
{
_gt_listType
=
Just
MapTerm
}
)
(
monoScoredInclHead
<>
monoScoredExclHead
<>
multScoredInclHead
<>
multScoredExclHead
)
)
<>
(
map
(
\
g
->
g
{
_gt_listType
=
Just
CandidateTerm
})
(
monoScoredInclTail
<>
monoScoredExclTail
<>
multScoredInclTail
<>
multScoredExclTail
)
)
termListTail
=
map
(
\
g
->
g
{
_gt_listType
=
Just
CandidateTerm
})
(
groupedMonoTail
<>
groupedMultTail
)
-- printDebug "monoScoredInclHead" monoScoredInclHead
-- printDebug "monoScoredExclHead" monoScoredExclTail
--
printDebug
"multScoredInclHead"
multScoredInclHead
printDebug
"multScoredExclTail"
multScoredExclTail
pure
$
Map
.
fromList
[(
NgramsTerms
,
(
List
.
concat
$
map
toNgramsElement
$
termListHead
)
<>
(
List
.
concat
$
map
toNgramsElement
$
termListTail
)
)
]
groupStems
::
[(
Stem
,
GroupedText
Double
)]
->
[
GroupedText
Double
]
groupStems
=
Map
.
elems
.
groupStems'
...
...
@@ -239,7 +282,7 @@ groupStems' = Map.fromListWith grouping
toNgramsElement
::
GroupedText
Double
->
[
NgramsElement
]
toNgramsElement
::
GroupedText
a
->
[
NgramsElement
]
toNgramsElement
(
GroupedText
listType
label
_
setNgrams
_
_
_
)
=
[
parentElem
]
<>
childrenElems
where
...
...
@@ -278,7 +321,8 @@ data GroupedText score =
,
_gt_stem
::
!
Stem
,
_gt_nodes
::
!
(
Set
NodeId
)
}
instance
Show
score
=>
Show
(
GroupedText
score
)
where
show
(
GroupedText
_
l
s
_
_
_
_
)
=
show
l
<>
":"
<>
show
s
instance
(
Eq
a
)
=>
Eq
(
GroupedText
a
)
where
(
==
)
(
GroupedText
_
_
score1
_
_
_
_
)
...
...
src/Gargantext/Core/Text/Metrics.hs
View file @
9e3e9e0f
...
...
@@ -18,18 +18,13 @@ module Gargantext.Core.Text.Metrics
--import Data.Array.Accelerate ((:.)(..), Z(..))
--import Math.KMeans (kmeans, euclidSq, elements)
--import GHC.Float (exp)
import
Data.Tuple.Extra
(
both
)
import
Data.Map
(
Map
)
import
Data.List.Extra
(
sortOn
)
import
Gargantext.Prelude
import
Gargantext.Core.Viz.Graph.Distances.Matrice
import
Gargantext.Core.Viz.Graph.Index
import
Gargantext.Core.Statistics
(
pcaReduceTo
,
Dimension
(
..
))
import
qualified
Data.Array.Accelerate
as
DAA
import
qualified
Data.Array.Accelerate.Interpreter
as
DAA
import
qualified
Data.List
as
List
import
qualified
Data.Map
as
Map
import
qualified
Data.Vector.Storable
as
Vec
...
...
@@ -37,14 +32,6 @@ import qualified Data.Vector.Storable as Vec
type
MapListSize
=
Int
type
InclusionSize
=
Int
{-
toScored' :: Ord t => [Map t (Vec.Vector Double)] -> [Scored t]
toScored' = map2scored
. (pcaReduceTo (Dimension 2))
. (Map.filter (\v -> Vec.length v > 1))
. (Map.unionsWith (<>))
-}
scored
::
Ord
t
=>
Map
(
t
,
t
)
Int
->
[
Scored
t
]
scored
=
map2scored
.
(
pcaReduceTo
(
Dimension
2
))
.
scored2map
where
...
...
@@ -61,6 +48,7 @@ data Scored ts = Scored
,
_scored_speExc
::
!
SpecificityExclusion
}
deriving
(
Show
)
localMetrics'
::
Ord
t
=>
Map
(
t
,
t
)
Int
->
Map
t
(
Vec
.
Vector
Double
)
localMetrics'
m
=
Map
.
fromList
$
zipWith
(
\
(
_
,
t
)
(
inc
,
spe
)
->
(
t
,
Vec
.
fromList
[
inc
,
spe
]))
(
Map
.
toList
fi
)
...
...
@@ -85,39 +73,3 @@ scored' m = zipWith (\(_,t) (inc,spe) -> Scored t inc spe) (Map.toList fi) score
$
DAA
.
zip
(
DAA
.
use
is
)
(
DAA
.
use
ss
)
takeScored
::
Ord
t
=>
MapListSize
->
InclusionSize
->
Map
(
t
,
t
)
Int
->
([
t
],[
t
])
takeScored
listSize
incSize
=
both
(
map
_scored_terms
)
.
takeLinear
listSize
incSize
_scored_genInc
_scored_speExc
.
scored
-- | Filter Scored data
-- >>> takeLinear 2 3 fst snd $ Prelude.zip ([1..10] :: [Int]) (reverse $ [1..10] :: [Int])
-- [(3,8),(6,5)]
takeLinear
::
(
Ord
b1
,
Ord
b2
)
=>
MapListSize
->
InclusionSize
->
(
a
->
b2
)
->
(
a
->
b1
)
->
[
a
]
->
([
a
],[
a
])
takeLinear
mls
incSize
speGen
incExc
=
(
List
.
splitAt
mls
)
.
List
.
concat
.
map
(
take
$
round
$
(
fromIntegral
mls
::
Double
)
/
(
fromIntegral
incSize
::
Double
)
)
.
map
(
sortOn
speGen
)
.
splitEvery
incSize
.
take
5000
.
takePercent
(
0.70
)
.
sortOn
incExc
takePercent
::
Double
->
[
a
]
->
[
a
]
takePercent
l
xs
=
List
.
take
l'
xs
where
l'
=
round
$
l
*
(
fromIntegral
$
List
.
length
xs
)
splitTake
::
(
Int
,
a
->
Bool
)
->
(
Int
,
a
->
Bool
)
->
[
a
]
->
([
a
],
[
a
])
splitTake
(
a
,
af
)
(
b
,
bf
)
xs
=
(
mpa
<>
mpb
,
ca
<>
cb
)
where
(
mpa
,
ca
)
=
List
.
splitAt
a
$
List
.
filter
af
xs
(
mpb
,
cb
)
=
List
.
splitAt
b
$
List
.
filter
bf
xs
src/Gargantext/Database/Action/Metrics.hs
View file @
9e3e9e0f
...
...
@@ -46,7 +46,7 @@ getNgramsCooc :: (FlowCmdM env err m)
)
getNgramsCooc
cId
maybeListId
tabType
maybeLimit
=
do
(
ngs'
,
ngs
)
<-
getNgrams
cId
maybeListId
tabType
let
take'
Nothing
xs
=
xs
take'
(
Just
n
)
xs
=
take
n
xs
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment