Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
H
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Przemyslaw Kaminski
haskell-gargantext
Commits
9e3e9e0f
Commit
9e3e9e0f
authored
Sep 22, 2020
by
Alexandre Delanoë
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
[TextFlow] MapList building, score needs normalization
parent
48eb263b
Changes
4
Show whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
95 additions
and
101 deletions
+95
-101
Main.hs
bin/gargantext-init/Main.hs
+0
-2
Ngrams.hs
src/Gargantext/API/Ngrams.hs
+3
-3
List.hs
src/Gargantext/Core/Text/List.hs
+91
-47
Metrics.hs
src/Gargantext/Core/Text/Metrics.hs
+1
-49
No files found.
bin/gargantext-init/Main.hs
View file @
9e3e9e0f
...
...
@@ -17,7 +17,6 @@ module Main where
import
Data.Text
(
Text
)
import
Data.Either
(
Either
(
..
))
import
Data.Maybe
(
Maybe
(
..
))
import
Gargantext.API.Admin.Settings
(
withDevEnv
,
runCmdDev
)
import
Gargantext.API.Prelude
(
GargError
)
import
Gargantext.API.Node
()
-- instances only
...
...
@@ -29,7 +28,6 @@ import Gargantext.Database.Admin.Config (userMaster, corpusMasterName)
import
Gargantext.Database.Admin.Types.Node
import
Gargantext.Database.Admin.Trigger.Init
(
initFirstTriggers
,
initLastTriggers
)
import
Gargantext.Database.Admin.Types.Hyperdata
(
HyperdataCorpus
)
import
Gargantext.Database.Admin.Types.Node
(
CorpusId
,
RootId
,
ListId
)
import
Gargantext.Database.Prelude
(
Cmd
,
)
import
Gargantext.Prelude
import
System.Environment
(
getArgs
)
...
...
src/Gargantext/API/Ngrams.hs
View file @
9e3e9e0f
...
...
@@ -968,9 +968,9 @@ putListNgrams' :: (HasInvalidError err, RepoCmdM env err m)
->
Map
NgramsTerm
NgramsRepoElement
->
m
()
putListNgrams'
nodeId
ngramsType
ns
=
do
printDebug
"[putListNgrams'] nodeId"
nodeId
printDebug
"[putListNgrams'] ngramsType"
ngramsType
printDebug
"[putListNgrams'] ns"
ns
--
printDebug "[putListNgrams'] nodeId" nodeId
--
printDebug "[putListNgrams'] ngramsType" ngramsType
--
printDebug "[putListNgrams'] ns" ns
let
p1
=
NgramsTablePatch
.
PM
.
fromMap
$
NgramsReplace
Nothing
.
Just
<$>
ns
(
p0
,
p0_validity
)
=
PM
.
singleton
nodeId
p1
...
...
src/Gargantext/Core/Text/List.hs
View file @
9e3e9e0f
...
...
@@ -14,9 +14,9 @@ Portability : POSIX
module
Gargantext.Core.Text.List
where
-- import Data.Either (partitionEithers, Either(..)
)
import
Control.Lens
(
makeLenses
,
set
)
import
Data.
Maybe
(
fromMaybe
)
import
Control.Lens
(
makeLenses
)
import
Data.Maybe
(
fromMaybe
,
catMaybes
)
import
Data.
Ord
(
Down
(
..
)
)
import
Data.Map
(
Map
)
import
Data.Set
(
Set
)
import
Data.Text
(
Text
)
...
...
@@ -29,9 +29,9 @@ import qualified Data.Text as Text
import
Gargantext.API.Ngrams
(
NgramsElement
,
mkNgramsElement
,
NgramsTerm
(
..
),
RootParent
(
..
),
mSetFromList
)
-- import Gargantext.API.Ngrams.Tools (getCoocByNgrams', Diagonal(..))
import
Gargantext.Core
(
Lang
(
..
))
import
Gargantext.Core.Types
(
ListType
(
..
),
MasterCorpusId
,
UserCorpusId
,
Ordering
(
..
)
)
import
Gargantext.Core.Types
(
ListType
(
..
),
MasterCorpusId
,
UserCorpusId
)
import
Gargantext.Database.Admin.Types.Node
(
NodeId
)
import
Gargantext.Core.Text.Metrics
(
scored'
)
import
Gargantext.Core.Text.Metrics
(
scored'
,
Scored
(
..
)
)
import
Gargantext.Database.Action.Metrics.NgramsByNode
(
ngramsGroup
,
getNodesByNgramsUser
,
groupNodesByNgramsWith
,
getNodesByNgramsOnlyUser
)
import
Gargantext.Database.Action.Metrics.TFICF
(
getTficf
)
import
Gargantext.Database.Query.Table.Node
(
defaultList
)
...
...
@@ -117,12 +117,14 @@ buildNgramsTermsList l n m s uCid mCid = do
-- Computing global speGen score
allTerms
<-
Map
.
toList
<$>
getTficf
uCid
mCid
NgramsTerms
-- printDebug "head candidates" (List.take 10 $ allTerms)
-- printDebug "tail candidates" (List.take 10 $ List.reverse $ allTerms)
-- First remove stops terms
let
(
stopTerms
,
candidateTerms
)
=
List
.
partition
((
isStopTerm
s
)
.
fst
)
allTerms
-- stopTerms ignored for now (need to be tagged already)
(
_stopTerms
,
candidateTerms
)
=
List
.
partition
((
isStopTerm
s
)
.
fst
)
allTerms
-- Grouping the ngrams and keeping the maximum score for label
let
grouped
=
groupStems'
...
...
@@ -134,22 +136,29 @@ buildNgramsTermsList l n m s uCid mCid = do
(
groupedMono
,
groupedMult
)
=
Map
.
partition
(
\
gt
->
_gt_size
gt
<
2
)
grouped
-- printDebug "groupedMult" groupedMult
-- splitting monterms and multiterms to take proportional candidates
let
listSizeGlobal
=
2000
::
Double
-- use % of list if to big, or Int if to small
monoSizeGlobal
=
0.6
::
Double
multSizeGlobal
=
1
-
monoSizeGlobal
monoSize
=
0.4
::
Double
multSize
=
1
-
monoSize
splitAt
n'
ns
=
List
.
splitAt
(
round
$
n'
*
listSizeGlobal
)
$
List
.
sort
$
Map
.
elems
ns
splitAt
n
ns
=
List
.
splitAt
(
round
$
n
*
listSizeGlobal
)
$
List
.
sort
$
Map
.
elems
ns
(
groupedMonoHead
,
groupedMonoTail
)
=
splitAt
monoSize
groupedMono
(
groupedMultHead
,
groupedMultTail
)
=
splitAt
multSize
groupedMult
(
groupedMonoHead
,
groupedMonoTail
)
=
splitAt
monoSizeGlobal
groupedMono
(
groupedMultHead
,
groupedMultTail
)
=
splitAt
multSizeGlobal
groupedMult
printDebug
"groupedMonoHead"
(
List
.
length
groupedMonoHead
)
printDebug
"groupedMonoTail"
(
List
.
length
groupedMonoHead
)
printDebug
"groupedMultHead"
(
List
.
length
groupedMultHead
)
printDebug
"groupedMultTail"
(
List
.
length
groupedMultTail
)
let
-- Get Local Scores now for selected grouped ngrams
selectedTerms
=
Set
.
toList
$
List
.
foldl'
(
\
set
(
GroupedText
_
l
_
g
_
_
_
)
->
Set
.
union
set
(
\
set
'
(
GroupedText
_
l'
_
g
_
_
_
)
->
Set
.
union
set'
$
Set
.
union
g
$
Set
.
singleton
l
$
Set
.
singleton
l'
)
Set
.
empty
(
groupedMonoHead
<>
groupedMultHead
)
...
...
@@ -176,50 +185,84 @@ buildNgramsTermsList l n m s uCid mCid = do
$
Map
.
keys
mapTextDocIds
-- compute cooccurrences
mapCooc
=
Map
.
fromList
[
((
t1
,
t2
),
Set
.
size
$
Set
.
intersection
s1
s2
)
mapCooc
=
Map
.
f
ilter
(
>
2
)
$
Map
.
f
romList
[
((
t1
,
t2
),
Set
.
size
$
Set
.
intersection
s1
s2
)
|
(
t1
,
s1
)
<-
mapStemNodeIds
,
(
t2
,
s2
)
<-
mapStemNodeIds
,
t1
/=
t2
-- Null Diagonal
]
where
mapStemNodeIds
=
Map
.
toList
$
Map
.
map
(
_gt_nodes
)
contextsAdded
-- printDebug "mapCooc" mapCooc
let
-- computing scores
scores
=
scored'
mapCooc
-- dilate scores
-- sort / filter
mapScores
f
=
Map
.
fromList
$
map
(
\
(
Scored
t
g
s'
)
->
(
t
,
f
(
g
,
s'
)))
$
scored'
mapCooc
groupsWithScores
=
catMaybes
$
map
(
\
(
stem
,
g
)
->
case
Map
.
lookup
stem
mapScores'
of
Nothing
->
Nothing
Just
s'
->
Just
$
g
{
_gt_score
=
s'
}
)
$
Map
.
toList
contextsAdded
where
mapScores'
=
mapScores
adapt1
-- identity
adapt1
(
s1
,
s2
)
=
(
log'
5
s1
,
log'
2
s2
)
log'
n'
x
=
1
+
(
if
x
<=
0
then
0
else
log
$
(
10
^
(
n'
::
Int
))
*
x
)
-- adapt2 TOCHECK with DC
-- printDebug "groupsWithScores" groupsWithScores
let
-- sort / partition / split
-- filter mono/multi again
(
monoScored
,
multScored
)
=
List
.
partition
(
\
g
->
_gt_size
g
<
2
)
groupsWithScores
-- filter with max score
partitionWithMaxScore
=
List
.
partition
(
\
g
->
let
(
s1
,
s2
)
=
_gt_score
g
in
s1
>
s2
)
(
monoScoredIncl
,
monoScoredExcl
)
=
partitionWithMaxScore
monoScored
(
multScoredIncl
,
multScoredExcl
)
=
partitionWithMaxScore
multScored
-- splitAt
let
(
mono
,
multi
)
=
List
.
partition
(
\
t
->
(
size
.
fst
)
t
<
2
)
candidateTerms
(
monoHead
,
monoTail
)
=
List
.
splitAt
(
round
$
0.60
*
listSizeGlobal
)
mono
(
multiHead
,
multiTail
)
=
List
.
splitAt
(
round
$
0.40
*
listSizeGlobal
)
multi
listSizeLocal
=
1000
::
Double
-- use % of list if to big, or Int if to small
inclSize
=
0.4
::
Double
exclSize
=
1
-
inclSize
splitAt'
n'
=
List
.
splitAt
(
round
$
n'
*
listSizeLocal
)
(
monoScoredInclHead
,
monoScoredInclTail
)
=
splitAt'
(
monoSize
*
inclSize
/
2
)
$
List
.
sortOn
(
Down
.
_gt_score
)
monoScoredIncl
(
monoScoredExclHead
,
monoScoredExclTail
)
=
splitAt'
(
monoSize
*
inclSize
/
2
)
$
List
.
sortOn
(
Down
.
_gt_score
)
monoScoredExcl
(
multScoredInclHead
,
multScoredInclTail
)
=
splitAt'
(
multSize
*
exclSize
/
2
)
$
List
.
sortOn
(
Down
.
_gt_score
)
multScoredIncl
(
multScoredExclHead
,
multScoredExclTail
)
=
splitAt'
(
multSize
*
exclSize
/
2
)
$
List
.
sortOn
(
Down
.
_gt_score
)
multScoredExcl
-- Computing local speGen score
listSizeLocal
=
350
::
Double
-- Final Step building the Typed list
termList
=
(
map
(
toGargList
$
Just
StopTerm
)
stopTerms
)
<>
(
map
(
toGargList
$
Just
MapTerm
)
(
monoHead
<>
multiHead
))
<>
(
map
(
toGargList
$
Just
CandidateTerm
)
(
monoTail
<>
multiTail
))
-- (map (toGargList $ Just StopTerm) stopTerms) -- Removing stops (needs social score)
termListHead
=
(
map
(
\
g
->
g
{
_gt_listType
=
Just
MapTerm
}
)
(
monoScoredInclHead
<>
monoScoredExclHead
<>
multScoredInclHead
<>
multScoredExclHead
)
)
<>
(
map
(
\
g
->
g
{
_gt_listType
=
Just
CandidateTerm
})
(
monoScoredInclTail
<>
monoScoredExclTail
<>
multScoredInclTail
<>
multScoredExclTail
)
)
termListTail
=
map
(
\
g
->
g
{
_gt_listType
=
Just
CandidateTerm
})
(
groupedMonoTail
<>
groupedMultTail
)
-- printDebug "monoScoredInclHead" monoScoredInclHead
-- printDebug "monoScoredExclHead" monoScoredExclTail
--
printDebug
"multScoredInclHead"
multScoredInclHead
printDebug
"multScoredExclTail"
multScoredExclTail
ngs
=
List
.
concat
$
map
toNgramsElement
$
groupStems
$
map
(
\
(
listType
,
(
t
,
d
))
->
let
stem
=
ngramsGroup
l
n
m
t
in
(
stem
,
GroupedText
listType
t
d
Set
.
empty
(
size
t
)
stem
Set
.
empty
pure
$
Map
.
fromList
[(
NgramsTerms
,
(
List
.
concat
$
map
toNgramsElement
$
termListHead
)
<>
(
List
.
concat
$
map
toNgramsElement
$
termListTail
)
)
)
termList
pure
$
Map
.
fromList
[(
NgramsTerms
,
ngs
)]
]
groupStems
::
[(
Stem
,
GroupedText
Double
)]
->
[
GroupedText
Double
]
groupStems
=
Map
.
elems
.
groupStems'
...
...
@@ -239,7 +282,7 @@ groupStems' = Map.fromListWith grouping
toNgramsElement
::
GroupedText
Double
->
[
NgramsElement
]
toNgramsElement
::
GroupedText
a
->
[
NgramsElement
]
toNgramsElement
(
GroupedText
listType
label
_
setNgrams
_
_
_
)
=
[
parentElem
]
<>
childrenElems
where
...
...
@@ -278,7 +321,8 @@ data GroupedText score =
,
_gt_stem
::
!
Stem
,
_gt_nodes
::
!
(
Set
NodeId
)
}
instance
Show
score
=>
Show
(
GroupedText
score
)
where
show
(
GroupedText
_
l
s
_
_
_
_
)
=
show
l
<>
":"
<>
show
s
instance
(
Eq
a
)
=>
Eq
(
GroupedText
a
)
where
(
==
)
(
GroupedText
_
_
score1
_
_
_
_
)
...
...
src/Gargantext/Core/Text/Metrics.hs
View file @
9e3e9e0f
...
...
@@ -18,18 +18,13 @@ module Gargantext.Core.Text.Metrics
--import Data.Array.Accelerate ((:.)(..), Z(..))
--import Math.KMeans (kmeans, euclidSq, elements)
--import GHC.Float (exp)
import
Data.Tuple.Extra
(
both
)
import
Data.Map
(
Map
)
import
Data.List.Extra
(
sortOn
)
import
Gargantext.Prelude
import
Gargantext.Core.Viz.Graph.Distances.Matrice
import
Gargantext.Core.Viz.Graph.Index
import
Gargantext.Core.Statistics
(
pcaReduceTo
,
Dimension
(
..
))
import
qualified
Data.Array.Accelerate
as
DAA
import
qualified
Data.Array.Accelerate.Interpreter
as
DAA
import
qualified
Data.List
as
List
import
qualified
Data.Map
as
Map
import
qualified
Data.Vector.Storable
as
Vec
...
...
@@ -37,14 +32,6 @@ import qualified Data.Vector.Storable as Vec
type
MapListSize
=
Int
type
InclusionSize
=
Int
{-
toScored' :: Ord t => [Map t (Vec.Vector Double)] -> [Scored t]
toScored' = map2scored
. (pcaReduceTo (Dimension 2))
. (Map.filter (\v -> Vec.length v > 1))
. (Map.unionsWith (<>))
-}
scored
::
Ord
t
=>
Map
(
t
,
t
)
Int
->
[
Scored
t
]
scored
=
map2scored
.
(
pcaReduceTo
(
Dimension
2
))
.
scored2map
where
...
...
@@ -61,6 +48,7 @@ data Scored ts = Scored
,
_scored_speExc
::
!
SpecificityExclusion
}
deriving
(
Show
)
localMetrics'
::
Ord
t
=>
Map
(
t
,
t
)
Int
->
Map
t
(
Vec
.
Vector
Double
)
localMetrics'
m
=
Map
.
fromList
$
zipWith
(
\
(
_
,
t
)
(
inc
,
spe
)
->
(
t
,
Vec
.
fromList
[
inc
,
spe
]))
(
Map
.
toList
fi
)
...
...
@@ -85,39 +73,3 @@ scored' m = zipWith (\(_,t) (inc,spe) -> Scored t inc spe) (Map.toList fi) score
$
DAA
.
zip
(
DAA
.
use
is
)
(
DAA
.
use
ss
)
takeScored
::
Ord
t
=>
MapListSize
->
InclusionSize
->
Map
(
t
,
t
)
Int
->
([
t
],[
t
])
takeScored
listSize
incSize
=
both
(
map
_scored_terms
)
.
takeLinear
listSize
incSize
_scored_genInc
_scored_speExc
.
scored
-- | Filter Scored data
-- >>> takeLinear 2 3 fst snd $ Prelude.zip ([1..10] :: [Int]) (reverse $ [1..10] :: [Int])
-- [(3,8),(6,5)]
takeLinear
::
(
Ord
b1
,
Ord
b2
)
=>
MapListSize
->
InclusionSize
->
(
a
->
b2
)
->
(
a
->
b1
)
->
[
a
]
->
([
a
],[
a
])
takeLinear
mls
incSize
speGen
incExc
=
(
List
.
splitAt
mls
)
.
List
.
concat
.
map
(
take
$
round
$
(
fromIntegral
mls
::
Double
)
/
(
fromIntegral
incSize
::
Double
)
)
.
map
(
sortOn
speGen
)
.
splitEvery
incSize
.
take
5000
.
takePercent
(
0.70
)
.
sortOn
incExc
takePercent
::
Double
->
[
a
]
->
[
a
]
takePercent
l
xs
=
List
.
take
l'
xs
where
l'
=
round
$
l
*
(
fromIntegral
$
List
.
length
xs
)
splitTake
::
(
Int
,
a
->
Bool
)
->
(
Int
,
a
->
Bool
)
->
[
a
]
->
([
a
],
[
a
])
splitTake
(
a
,
af
)
(
b
,
bf
)
xs
=
(
mpa
<>
mpb
,
ca
<>
cb
)
where
(
mpa
,
ca
)
=
List
.
splitAt
a
$
List
.
filter
af
xs
(
mpb
,
cb
)
=
List
.
splitAt
b
$
List
.
filter
bf
xs
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment