Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
158
Issues
158
List
Board
Labels
Milestones
Merge Requests
11
Merge Requests
11
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
gargantext
haskell-gargantext
Commits
8b029638
Commit
8b029638
authored
Nov 26, 2020
by
Alexandre Delanoë
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
[REFACT] WIP compiling, needs setGroupedTreeWith specific scores.
parent
226db2c5
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
120 additions
and
77 deletions
+120
-77
List.hs
src/Gargantext/Core/Text/List.hs
+74
-34
Group.hs
src/Gargantext/Core/Text/List/Group.hs
+18
-36
Prelude.hs
src/Gargantext/Core/Text/List/Group/Prelude.hs
+23
-3
Metrics.hs
src/Gargantext/Core/Text/Metrics.hs
+5
-4
No files found.
src/Gargantext/Core/Text/List.hs
View file @
8b029638
...
...
@@ -15,7 +15,7 @@ Portability : POSIX
module
Gargantext.Core.Text.List
where
import
Control.Lens
((
^.
),
view
,
over
)
import
Control.Lens
hiding
(
both
)
-- ((^.), view, over, set, (_1), (_2)
)
import
Data.Map
(
Map
)
import
Data.Maybe
(
catMaybes
)
import
Data.Monoid
(
mempty
)
...
...
@@ -25,12 +25,13 @@ import Data.Text (Text)
import
Data.Tuple.Extra
(
both
)
import
Gargantext.API.Ngrams.Types
(
NgramsElement
)
import
Gargantext.API.Ngrams.Types
(
RepoCmdM
)
import
Gargantext.Core.Text
(
size
)
import
Gargantext.Core.Text.List.Group
import
Gargantext.Core.Text.List.Group.Prelude
import
Gargantext.Core.Text.List.Group.WithStem
import
Gargantext.Core.Text.List.Social
import
Gargantext.Core.Text.List.Social.Prelude
import
Gargantext.Core.Text.Metrics
(
scored'
,
Scored
(
..
),
normalizeGlobal
,
normalizeLocal
)
import
Gargantext.Core.Text.Metrics
(
scored'
,
Scored
(
..
),
scored_speExc
,
scored_genInc
,
normalizeGlobal
,
normalizeLocal
)
import
Gargantext.Core.Types
(
ListType
(
..
),
MasterCorpusId
,
UserCorpusId
)
import
Gargantext.Core.Types.Individu
(
User
(
..
))
import
Gargantext.Database.Action.Metrics.NgramsByNode
(
getNodesByNgramsUser
,
getNodesByNgramsOnlyUser
)
...
...
@@ -154,9 +155,9 @@ buildNgramsTermsList user uCid mCid groupParams (nt, mapListSize)= do
$ view flc_scores socialLists'
-}
let
groupedWithList
=
toGroupedTree
groupParams
socialLists'
allTerms
let
groupedWithList
=
toGroupedTree
groupParams
socialLists'
allTerms
{-
-- TODO remove
-- 8<-- 8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--
-- First remove stops terms
...
...
@@ -167,11 +168,12 @@ buildNgramsTermsList user uCid mCid groupParams (nt, mapListSize)= do
groupedWithList = map (addListType (invertForw socialLists)) grouped
-- 8<-- 8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--
-}
(
stopTerms
,
candidateTerms
)
=
Map
.
partition
((
==
Just
StopTerm
)
.
viewListType
)
groupedWithList
(
groupedMono
,
groupedMult
)
=
Map
.
partition
(
\
t
->
t
^.
gt_size
<
2
)
candidateTerms
--
(groupedMono, groupedMult) = Map.partitionWithKey (\t _v -> size t < 2) candidateTerms
(
stopTerms
,
candidateTerms
)
=
Map
.
partition
((
==
Just
StopTerm
)
.
viewListType
)
$
view
flc_scores
groupedWithList
--
(groupedMono, groupedMult) = Map.partition (\t -> t ^. gt_size < 2) candidateTerms
(
groupedMono
,
groupedMult
)
=
Map
.
partitionWithKey
(
\
t
_v
->
size
t
<
2
)
candidateTerms
-- printDebug "\n * stopTerms * \n" stopTerms
-- splitting monterms and multiterms to take proportional candidates
...
...
@@ -180,7 +182,10 @@ buildNgramsTermsList user uCid mCid groupParams (nt, mapListSize)= do
monoSize
=
0.4
::
Double
multSize
=
1
-
monoSize
splitAt
n'
ns
=
List
.
splitAt
(
round
$
n'
*
listSizeGlobal
)
$
List
.
sort
$
Map
.
elems
ns
splitAt
n'
ns
=
both
(
Map
.
fromListWith
(
<>
))
$
List
.
splitAt
(
round
$
n'
*
listSizeGlobal
)
$
List
.
sortOn
(
viewScore
.
snd
)
$
Map
.
toList
ns
(
groupedMonoHead
,
groupedMonoTail
)
=
splitAt
monoSize
groupedMono
(
groupedMultHead
,
groupedMultTail
)
=
splitAt
multSize
groupedMult
...
...
@@ -190,6 +195,7 @@ buildNgramsTermsList user uCid mCid groupParams (nt, mapListSize)= do
-- printDebug "groupedMultHead" (List.length groupedMultHead)
-- printDebug "groupedMultTail" (List.length groupedMultTail)
{-
let
-- Get Local Scores now for selected grouped ngrams
-- TODO HasTerms
...
...
@@ -199,8 +205,8 @@ buildNgramsTermsList user uCid mCid groupParams (nt, mapListSize)= do
)
Set.empty
(groupedMonoHead <> groupedMultHead)
-- selectedTerms = hasTerms (groupedMonoHead <> groupedMultHead)
-}
selectedTerms
=
Set
.
toList
$
hasTerms
(
groupedMonoHead
<>
groupedMultHead
)
-- TO remove (and remove HasNodeError instance)
userListId
<-
defaultList
uCid
masterListId
<-
defaultList
mCid
...
...
@@ -210,6 +216,13 @@ buildNgramsTermsList user uCid mCid groupParams (nt, mapListSize)= do
nt
selectedTerms
-- TODO
let
groupedTreeScores_SetNodeId
=
setScoresWith
mapTextDocIds
(
groupedMonoHead
<>
groupedMultHead
)
{-
-- TODO remove
-- 8<-- 8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--
let
mapGroups = Map.fromList
$ map (\g -> (g ^. gt_stem, g))
...
...
@@ -226,42 +239,62 @@ buildNgramsTermsList user uCid mCid groupParams (nt, mapListSize)= do
)
mapGroups
$ Map.keys mapTextDocIds
-}
-- compute cooccurrences
mapCooc
=
Map
.
filter
(
>
2
)
-- | Coocurrences computation
--, t1 >= t2 -- permute byAxis diag -- since matrix symmetric
let
mapCooc
=
Map
.
filter
(
>
2
)
$
Map
.
fromList
[
((
t1
,
t2
),
Set
.
size
$
Set
.
intersection
s1
s2
)
|
(
t1
,
s1
)
<-
mapStemNodeIds
,
(
t2
,
s2
)
<-
mapStemNodeIds
--, t1 >= t2 -- permute byAxis diag -- since matrix symmetric
]
where
mapStemNodeIds
=
Map
.
toList
$
Map
.
map
(
_gt_nodes
)
contextsAdded
-- printDebug "mapCooc" mapCooc
where
mapStemNodeIds
=
Map
.
toList
$
Map
.
map
viewScores
$
groupedTreeScores_SetNodeId
let
-- computing scores
mapScores
f
=
Map
.
fromList
$
map
(
\
(
Scored
t
g
s'
)
->
(
t
,
f
(
g
,
s'
)
))
$
map
(
\
s
@
(
Scored
t
g
s'
)
->
(
t
,
f
s
))
$
normalizeGlobal
$
map
normalizeLocal
$
scored'
mapCooc
let
-- groupedTreeScores_SpeGen :: GroupedTreeScores (Scored Double)
groupedTreeScores_SpeGen
=
setScoresWith
(
mapScores
identity
)
(
groupedMonoHead
<>
groupedMultHead
)
{-
-- TODO remove
-- 8<-- 8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--
groupsWithScores = catMaybes
$ map (\(stem, g)
-> case Map.lookup stem mapScores' of
Nothing -> Nothing
Just
s'
->
Just
$
g
{
_gt_score
=
s'
}
)
$
Map
.
toList
contextsAdded
Just s' ->
set gts'_score s' g
) $ Map.toList
$ view flc_scores
contextsAdded
where
mapScores' = mapScores identity
-- 8<-- 8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--
-- TODO remove
-}
-- adapt2 TOCHECK with DC
-- printDebug "groupsWithScores" groupsWithScores
-- 8<-- 8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--
-- TODO remove
--}
let
-- sort / partition / split
-- filter mono/multi again
(
monoScored
,
multScored
)
=
List
.
partition
(
\
g
->
_gt_size
g
<
2
)
groupsWithScores
(
monoScored
,
multScored
)
=
Map
.
partitionWithKey
(
\
t
_v
->
size
t
<
2
)
groupedTreeScores_SpeGen
-- (monoScored, multScored) = List.partition (\g -> _gt_size g < 2) groupsWithScores
-- filter with max score
partitionWithMaxScore
=
List
.
partition
(
\
g
->
let
(
s1
,
s2
)
=
viewScore
g
in
s1
>
s2
)
-- partitionWithMaxScore = List.partition (\g -> let (s1,s2) = viewScore g in s1 > s2 )
partitionWithMaxScore
=
Map
.
partition
(
\
g
->
(
view
scored_genInc
$
view
gts'_score
g
)
>
(
view
scored_speExc
$
view
gts'_score
g
)
)
(
monoScoredIncl
,
monoScoredExcl
)
=
partitionWithMaxScore
monoScored
(
multScoredIncl
,
multScoredExcl
)
=
partitionWithMaxScore
multScored
...
...
@@ -271,31 +304,37 @@ buildNgramsTermsList user uCid mCid groupParams (nt, mapListSize)= do
listSizeLocal
=
1000
::
Double
-- use % of list if to big, or Int if to small
inclSize
=
0.4
::
Double
exclSize
=
1
-
inclSize
splitAt'
n'
=
List
.
splitAt
(
round
$
n'
*
listSizeLocal
)
(
monoScoredInclHead
,
monoScoredInclTail
)
=
splitAt'
(
monoSize
*
inclSize
/
2
)
$
List
.
sortOn
(
Down
.
viewScore
)
monoScoredIncl
(
monoScoredExclHead
,
monoScoredExclTail
)
=
splitAt'
(
monoSize
*
inclSize
/
2
)
$
List
.
sortOn
(
Down
.
viewScore
)
monoScoredExcl
--splitAt' n' = (both (Map.fromListWith (<>))) . (List.splitAt (round $ n' * listSizeLocal))
splitAt'
n'
=
(
both
(
Map
.
fromList
))
.
(
List
.
splitAt
(
round
$
n'
*
listSizeLocal
))
(
multScoredInclHead
,
multScoredInclTail
)
=
splitAt'
(
multSize
*
exclSize
/
2
)
$
List
.
sortOn
(
Down
.
viewScore
)
multScoredIncl
(
multScoredExclHead
,
multScoredExclTail
)
=
splitAt'
(
multSize
*
exclSize
/
2
)
$
List
.
sortOn
(
Down
.
viewScore
)
multScoredExcl
sortOn
f
=
(
List
.
sortOn
(
Down
.
f
.
_gts'_score
.
snd
))
.
Map
.
toList
--sortOn f = (List.sortOn (Down . (gts'_score))) . Map.toList
-- sort = (List.sortOn (Down . viewScore))
(
monoScoredInclHead
,
monoScoredInclTail
)
=
splitAt'
(
monoSize
*
inclSize
/
2
)
$
(
sortOn
_scored_genInc
)
monoScoredIncl
(
monoScoredExclHead
,
monoScoredExclTail
)
=
splitAt'
(
monoSize
*
inclSize
/
2
)
$
(
sortOn
_scored_speExc
)
monoScoredExcl
(
multScoredInclHead
,
multScoredInclTail
)
=
splitAt'
(
multSize
*
exclSize
/
2
)
$
(
sortOn
_scored_genInc
)
multScoredIncl
(
multScoredExclHead
,
multScoredExclTail
)
=
splitAt'
(
multSize
*
exclSize
/
2
)
$
(
sortOn
_scored_speExc
)
multScoredExcl
-- Final Step building the Typed list
termListHead
=
maps
<>
cands
where
maps
=
setListType
(
Just
MapTerm
)
<$>
monoScoredInclHead
$
monoScoredInclHead
<>
monoScoredExclHead
<>
multScoredInclHead
<>
multScoredExclHead
cands
=
setListType
(
Just
CandidateTerm
)
<$>
monoScoredInclTail
$
monoScoredInclTail
<>
monoScoredExclTail
<>
multScoredInclTail
<>
multScoredExclTail
termListTail
=
map
(
setListType
(
Just
CandidateTerm
))
(
groupedMonoTail
<>
groupedMultTail
)
termListTail
=
(
setListType
(
Just
CandidateTerm
))
(
groupedMonoTail
<>
groupedMultTail
)
-- printDebug "monoScoredInclHead" monoScoredInclHead
-- printDebug "monoScoredExclHead" monoScoredExclTail
...
...
@@ -303,12 +342,13 @@ buildNgramsTermsList user uCid mCid groupParams (nt, mapListSize)= do
-- printDebug "multScoredExclTail" multScoredExclTail
let
result
=
Map
.
unionsWith
(
<>
)
[
Map
.
fromList
[(
nt
,
(
List
.
concat
$
map
toNgramsElement
$
termListHead
)
<>
(
List
.
concat
$
map
toNgramsElement
$
termListTail
)
<>
(
List
.
concat
$
map
toNgramsElement
$
stopTerms
)
[
Map
.
fromList
[(
nt
,
toNgramsElement
termListHead
<>
toNgramsElement
termListTail
<>
toNgramsElement
stopTerms
)]
]
-- printDebug "\n result \n" r
pure
result
...
...
src/Gargantext/Core/Text/List/Group.hs
View file @
8b029638
...
...
@@ -22,7 +22,7 @@ import Control.Lens (set, view)
import
Data.Set
(
Set
)
import
Data.Map
(
Map
)
import
Data.Maybe
(
fromMaybe
)
import
Data.Monoid
(
mempty
)
import
Data.Monoid
(
Monoid
,
mempty
)
import
Data.Text
(
Text
)
import
Gargantext.Core.Types
(
ListType
(
..
))
import
Gargantext.Database.Admin.Types.Node
(
NodeId
)
...
...
@@ -37,44 +37,26 @@ import qualified Data.List as List
------------------------------------------------------------------------
-- | TODO add group with stemming
toGroupedTree
::
(
Ord
a
,
Monoid
a
,
GroupWithStem
a
)
=>
GroupParams
->
FlowCont
Text
FlowListScores
->
Map
Text
a
-- -> Map Text (GroupedTreeScores (Set NodeId))
->
FlowCont
Text
(
GroupedTreeScores
a
)
toGroupedTree
groupParams
flc
scores
=
{-view flc_scores-}
flow2
where
flow1
=
groupWithScores'
flc
scoring
scoring
t
=
fromMaybe
mempty
$
Map
.
lookup
t
scores
class
ToGroupedTree
a
b
|
a
->
b
where
toGroupedTree
::
GroupParams
->
FlowCont
Text
FlowListScores
->
a
->
FlowCont
Text
(
GroupedTreeScores
b
)
flow2
=
case
(
view
flc_cont
flow1
)
==
Map
.
empty
of
True
->
flow1
False
->
groupWithStem'
groupParams
flow1
instance
ToGroupedTree
(
Map
Text
(
Set
NodeId
))
(
Set
NodeId
)
where
toGroupedTree
::
GroupParams
->
FlowCont
Text
FlowListScores
->
Map
Text
(
Set
NodeId
)
-- -> Map Text (GroupedTreeScores (Set NodeId))
->
FlowCont
Text
(
GroupedTreeScores
(
Set
NodeId
))
toGroupedTree
groupParams
flc
scores
=
{-view flc_scores-}
flow2
where
flow1
=
groupWithScores'
flc
scoring
scoring
t
=
fromMaybe
Set
.
empty
$
Map
.
lookup
t
scores
flow2
=
case
(
view
flc_cont
flow1
)
==
Map
.
empty
of
True
->
flow1
False
->
groupWithStem'
groupParams
flow1
instance
ToGroupedTree
(
Map
Text
Double
)
Double
where
toGroupedTree
::
GroupParams
->
FlowCont
Text
FlowListScores
->
Map
Text
Double
-- -> Map Text (GroupedTreeScores (Set NodeId))
->
FlowCont
Text
(
GroupedTreeScores
Double
)
toGroupedTree
groupParams
flc
scores
=
{-view flc_scores-}
flow2
where
flow1
=
groupWithScores'
flc
scoring
scoring
t
=
fromMaybe
mempty
$
Map
.
lookup
t
scores
setScoresWith
::
Map
Text
a
->
Map
Text
(
GroupedTreeScores
b
)
->
Map
Text
(
GroupedTreeScores
a
)
setScoresWith
=
undefined
flow2
=
case
(
view
flc_cont
flow1
)
==
Map
.
empty
of
True
->
flow1
False
->
groupWithStem'
groupParams
flow1
------------------------------------------------------------------------
------------------------------------------------------------------------
...
...
src/Gargantext/Core/Text/List/Group/Prelude.hs
View file @
8b029638
...
...
@@ -16,7 +16,7 @@ Portability : POSIX
module
Gargantext.Core.Text.List.Group.Prelude
where
import
Control.Lens
(
makeLenses
,
view
,
set
)
import
Control.Lens
(
makeLenses
,
view
,
set
,
over
)
import
Data.Monoid
import
Data.Semigroup
import
Data.Set
(
Set
)
...
...
@@ -25,6 +25,7 @@ import Data.Maybe (fromMaybe)
import
Data.Map
(
Map
)
import
Gargantext.Core.Types
(
ListType
(
..
))
import
Gargantext.Database.Admin.Types.Node
(
NodeId
)
import
Gargantext.Core.Text.Metrics
(
scored'
,
Scored
(
..
),
scored_speExc
,
scored_genInc
,
normalizeGlobal
,
normalizeLocal
)
import
Gargantext.API.Ngrams.Types
(
NgramsElement
,
mkNgramsElement
,
NgramsTerm
(
..
),
RootParent
(
..
),
mSetFromList
)
import
Gargantext.Prelude
import
qualified
Data.Set
as
Set
...
...
@@ -41,7 +42,7 @@ data GroupedTreeScores score =
,
_gts'_score
::
!
score
}
deriving
(
Show
,
Ord
,
Eq
)
instance
(
Semigroup
a
,
Ord
a
)
=>
Semigroup
(
GroupedTreeScores
a
)
where
instance
(
Semigroup
a
)
=>
Semigroup
(
GroupedTreeScores
a
)
where
(
<>
)
(
GroupedTreeScores
l1
s1
c1
)
(
GroupedTreeScores
l2
s2
c2
)
=
GroupedTreeScores
(
l1
<>
l2
)
...
...
@@ -62,12 +63,14 @@ class ViewListType a where
class
SetListType
a
where
setListType
::
Maybe
ListType
->
a
->
a
------
class
Ord
b
=>
ViewScore
a
b
|
a
->
b
where
viewScore
::
a
->
b
class
ViewScores
a
b
|
a
->
b
where
viewScores
::
a
->
b
--------
class
ToNgramsElement
a
where
toNgramsElement
::
a
->
[
NgramsElement
]
...
...
@@ -80,12 +83,24 @@ instance ViewListType (GroupedTreeScores a) where
viewListType
=
view
gts'_listType
instance
SetListType
(
GroupedTreeScores
a
)
where
setListType
=
set
gts'_listType
setListType
lt
g
=
over
gts'_children
(
setListType
lt
)
$
set
gts'_listType
lt
g
instance
SetListType
(
Map
Text
(
GroupedTreeScores
a
))
where
setListType
lt
=
Map
.
map
(
set
gts'_listType
lt
)
------
instance
ViewScore
(
GroupedTreeScores
Double
)
Double
where
viewScore
=
viewScores
instance
ViewScores
(
GroupedTreeScores
Double
)
Double
where
viewScores
g
=
sum
$
parent
:
children
where
parent
=
view
gts'_score
g
children
=
map
viewScores
$
Map
.
elems
$
view
gts'_children
g
instance
ViewScore
(
GroupedTreeScores
(
Set
NodeId
))
Int
where
viewScore
=
Set
.
size
.
viewScores
...
...
@@ -95,6 +110,10 @@ instance ViewScores (GroupedTreeScores (Set NodeId)) (Set NodeId) where
parent
=
view
gts'_score
g
children
=
map
viewScores
$
Map
.
elems
$
view
gts'_children
g
instance
ViewScore
(
GroupedTreeScores
(
Scored
Text
))
Double
where
viewScore
=
view
(
gts'_score
.
scored_genInc
)
------
instance
HasTerms
(
Map
Text
(
GroupedTreeScores
a
))
where
hasTerms
=
Set
.
unions
.
(
map
hasTerms
)
.
Map
.
toList
...
...
@@ -112,6 +131,7 @@ instance HasTerms (Text, GroupedTreeScores a) where
instance
ToNgramsElement
(
Map
Text
(
GroupedTreeScores
a
))
where
toNgramsElement
=
List
.
concat
.
(
map
toNgramsElement
)
.
Map
.
toList
instance
ToNgramsElement
(
Text
,
GroupedTreeScores
a
)
where
toNgramsElement
(
t
,
gts
)
=
parent
:
children
where
...
...
src/Gargantext/Core/Text/Metrics.hs
View file @
8b029638
...
...
@@ -11,13 +11,14 @@ Mainly reexport functions in @Data.Text.Metrics@
-}
{-# LANGUAGE
BangPatterns
#-}
{-# LANGUAGE
TemplateHaskell
#-}
module
Gargantext.Core.Text.Metrics
where
--import Data.Array.Accelerate ((:.)(..), Z(..))
--import Math.KMeans (kmeans, euclidSq, elements)
import
Control.Lens
(
makeLenses
)
import
Data.Map
(
Map
)
import
Gargantext.Prelude
import
Gargantext.Core.Methods.Distances.Accelerate.SpeGen
...
...
@@ -46,7 +47,7 @@ data Scored ts = Scored
{
_scored_terms
::
!
ts
,
_scored_genInc
::
!
GenericityInclusion
,
_scored_speExc
::
!
SpecificityExclusion
}
deriving
(
Show
)
}
deriving
(
Show
,
Eq
,
Ord
)
localMetrics'
::
Ord
t
=>
Map
(
t
,
t
)
Int
->
Map
t
(
Vec
.
Vector
Double
)
...
...
@@ -96,5 +97,5 @@ normalizeLocal (Scored t s1 s2) = Scored t (log' 5 s1) (log' 2 s2)
-- | Type Instances
makeLenses
'S
c
ored
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment