Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
H
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Przemyslaw Kaminski
haskell-gargantext
Commits
480f7bb9
Commit
480f7bb9
authored
Nov 26, 2020
by
Alexandre Delanoë
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
[REFACT] WIP compiling, needs setGroupedTreeWith specific scores.
parent
b5870fb2
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
120 additions
and
77 deletions
+120
-77
List.hs
src/Gargantext/Core/Text/List.hs
+74
-34
Group.hs
src/Gargantext/Core/Text/List/Group.hs
+18
-36
Prelude.hs
src/Gargantext/Core/Text/List/Group/Prelude.hs
+23
-3
Metrics.hs
src/Gargantext/Core/Text/Metrics.hs
+5
-4
No files found.
src/Gargantext/Core/Text/List.hs
View file @
480f7bb9
...
...
@@ -15,7 +15,7 @@ Portability : POSIX
module
Gargantext.Core.Text.List
where
import
Control.Lens
((
^.
),
view
,
over
)
import
Control.Lens
hiding
(
both
)
-- ((^.), view, over, set, (_1), (_2)
)
import
Data.Map
(
Map
)
import
Data.Maybe
(
catMaybes
)
import
Data.Monoid
(
mempty
)
...
...
@@ -25,12 +25,13 @@ import Data.Text (Text)
import
Data.Tuple.Extra
(
both
)
import
Gargantext.API.Ngrams.Types
(
NgramsElement
)
import
Gargantext.API.Ngrams.Types
(
RepoCmdM
)
import
Gargantext.Core.Text
(
size
)
import
Gargantext.Core.Text.List.Group
import
Gargantext.Core.Text.List.Group.Prelude
import
Gargantext.Core.Text.List.Group.WithStem
import
Gargantext.Core.Text.List.Social
import
Gargantext.Core.Text.List.Social.Prelude
import
Gargantext.Core.Text.Metrics
(
scored'
,
Scored
(
..
),
normalizeGlobal
,
normalizeLocal
)
import
Gargantext.Core.Text.Metrics
(
scored'
,
Scored
(
..
),
scored_speExc
,
scored_genInc
,
normalizeGlobal
,
normalizeLocal
)
import
Gargantext.Core.Types
(
ListType
(
..
),
MasterCorpusId
,
UserCorpusId
)
import
Gargantext.Core.Types.Individu
(
User
(
..
))
import
Gargantext.Database.Action.Metrics.NgramsByNode
(
getNodesByNgramsUser
,
getNodesByNgramsOnlyUser
)
...
...
@@ -154,9 +155,9 @@ buildNgramsTermsList user uCid mCid groupParams (nt, mapListSize)= do
$ view flc_scores socialLists'
-}
let
groupedWithList
=
toGroupedTree
groupParams
socialLists'
allTerms
let
groupedWithList
=
toGroupedTree
groupParams
socialLists'
allTerms
{-
-- TODO remove
-- 8<-- 8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--
-- First remove stops terms
...
...
@@ -167,11 +168,12 @@ buildNgramsTermsList user uCid mCid groupParams (nt, mapListSize)= do
groupedWithList = map (addListType (invertForw socialLists)) grouped
-- 8<-- 8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--
-}
(
stopTerms
,
candidateTerms
)
=
Map
.
partition
((
==
Just
StopTerm
)
.
viewListType
)
groupedWithList
(
groupedMono
,
groupedMult
)
=
Map
.
partition
(
\
t
->
t
^.
gt_size
<
2
)
candidateTerms
--
(groupedMono, groupedMult) = Map.partitionWithKey (\t _v -> size t < 2) candidateTerms
(
stopTerms
,
candidateTerms
)
=
Map
.
partition
((
==
Just
StopTerm
)
.
viewListType
)
$
view
flc_scores
groupedWithList
--
(groupedMono, groupedMult) = Map.partition (\t -> t ^. gt_size < 2) candidateTerms
(
groupedMono
,
groupedMult
)
=
Map
.
partitionWithKey
(
\
t
_v
->
size
t
<
2
)
candidateTerms
-- printDebug "\n * stopTerms * \n" stopTerms
-- splitting monterms and multiterms to take proportional candidates
...
...
@@ -180,7 +182,10 @@ buildNgramsTermsList user uCid mCid groupParams (nt, mapListSize)= do
monoSize
=
0.4
::
Double
multSize
=
1
-
monoSize
splitAt
n'
ns
=
List
.
splitAt
(
round
$
n'
*
listSizeGlobal
)
$
List
.
sort
$
Map
.
elems
ns
splitAt
n'
ns
=
both
(
Map
.
fromListWith
(
<>
))
$
List
.
splitAt
(
round
$
n'
*
listSizeGlobal
)
$
List
.
sortOn
(
viewScore
.
snd
)
$
Map
.
toList
ns
(
groupedMonoHead
,
groupedMonoTail
)
=
splitAt
monoSize
groupedMono
(
groupedMultHead
,
groupedMultTail
)
=
splitAt
multSize
groupedMult
...
...
@@ -190,6 +195,7 @@ buildNgramsTermsList user uCid mCid groupParams (nt, mapListSize)= do
-- printDebug "groupedMultHead" (List.length groupedMultHead)
-- printDebug "groupedMultTail" (List.length groupedMultTail)
{-
let
-- Get Local Scores now for selected grouped ngrams
-- TODO HasTerms
...
...
@@ -199,8 +205,8 @@ buildNgramsTermsList user uCid mCid groupParams (nt, mapListSize)= do
)
Set.empty
(groupedMonoHead <> groupedMultHead)
-- selectedTerms = hasTerms (groupedMonoHead <> groupedMultHead)
-}
selectedTerms
=
Set
.
toList
$
hasTerms
(
groupedMonoHead
<>
groupedMultHead
)
-- TO remove (and remove HasNodeError instance)
userListId
<-
defaultList
uCid
masterListId
<-
defaultList
mCid
...
...
@@ -210,6 +216,13 @@ buildNgramsTermsList user uCid mCid groupParams (nt, mapListSize)= do
nt
selectedTerms
-- TODO
let
groupedTreeScores_SetNodeId
=
setScoresWith
mapTextDocIds
(
groupedMonoHead
<>
groupedMultHead
)
{-
-- TODO remove
-- 8<-- 8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--
let
mapGroups = Map.fromList
$ map (\g -> (g ^. gt_stem, g))
...
...
@@ -226,42 +239,62 @@ buildNgramsTermsList user uCid mCid groupParams (nt, mapListSize)= do
)
mapGroups
$ Map.keys mapTextDocIds
-}
-- compute cooccurrences
mapCooc
=
Map
.
filter
(
>
2
)
-- | Coocurrences computation
--, t1 >= t2 -- permute byAxis diag -- since matrix symmetric
let
mapCooc
=
Map
.
filter
(
>
2
)
$
Map
.
fromList
[
((
t1
,
t2
),
Set
.
size
$
Set
.
intersection
s1
s2
)
|
(
t1
,
s1
)
<-
mapStemNodeIds
,
(
t2
,
s2
)
<-
mapStemNodeIds
--, t1 >= t2 -- permute byAxis diag -- since matrix symmetric
]
where
mapStemNodeIds
=
Map
.
toList
$
Map
.
map
(
_gt_nodes
)
contextsAdded
-- printDebug "mapCooc" mapCooc
where
mapStemNodeIds
=
Map
.
toList
$
Map
.
map
viewScores
$
groupedTreeScores_SetNodeId
let
-- computing scores
mapScores
f
=
Map
.
fromList
$
map
(
\
(
Scored
t
g
s'
)
->
(
t
,
f
(
g
,
s'
)
))
$
map
(
\
s
@
(
Scored
t
g
s'
)
->
(
t
,
f
s
))
$
normalizeGlobal
$
map
normalizeLocal
$
scored'
mapCooc
let
-- groupedTreeScores_SpeGen :: GroupedTreeScores (Scored Double)
groupedTreeScores_SpeGen
=
setScoresWith
(
mapScores
identity
)
(
groupedMonoHead
<>
groupedMultHead
)
{-
-- TODO remove
-- 8<-- 8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--
groupsWithScores = catMaybes
$ map (\(stem, g)
-> case Map.lookup stem mapScores' of
Nothing -> Nothing
Just
s'
->
Just
$
g
{
_gt_score
=
s'
}
)
$
Map
.
toList
contextsAdded
Just s' ->
set gts'_score s' g
) $ Map.toList
$ view flc_scores
contextsAdded
where
mapScores' = mapScores identity
-- 8<-- 8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--
-- TODO remove
-}
-- adapt2 TOCHECK with DC
-- printDebug "groupsWithScores" groupsWithScores
-- 8<-- 8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--
-- TODO remove
--}
let
-- sort / partition / split
-- filter mono/multi again
(
monoScored
,
multScored
)
=
List
.
partition
(
\
g
->
_gt_size
g
<
2
)
groupsWithScores
(
monoScored
,
multScored
)
=
Map
.
partitionWithKey
(
\
t
_v
->
size
t
<
2
)
groupedTreeScores_SpeGen
-- (monoScored, multScored) = List.partition (\g -> _gt_size g < 2) groupsWithScores
-- filter with max score
partitionWithMaxScore
=
List
.
partition
(
\
g
->
let
(
s1
,
s2
)
=
viewScore
g
in
s1
>
s2
)
-- partitionWithMaxScore = List.partition (\g -> let (s1,s2) = viewScore g in s1 > s2 )
partitionWithMaxScore
=
Map
.
partition
(
\
g
->
(
view
scored_genInc
$
view
gts'_score
g
)
>
(
view
scored_speExc
$
view
gts'_score
g
)
)
(
monoScoredIncl
,
monoScoredExcl
)
=
partitionWithMaxScore
monoScored
(
multScoredIncl
,
multScoredExcl
)
=
partitionWithMaxScore
multScored
...
...
@@ -271,31 +304,37 @@ buildNgramsTermsList user uCid mCid groupParams (nt, mapListSize)= do
listSizeLocal
=
1000
::
Double
-- use % of list if to big, or Int if to small
inclSize
=
0.4
::
Double
exclSize
=
1
-
inclSize
splitAt'
n'
=
List
.
splitAt
(
round
$
n'
*
listSizeLocal
)
(
monoScoredInclHead
,
monoScoredInclTail
)
=
splitAt'
(
monoSize
*
inclSize
/
2
)
$
List
.
sortOn
(
Down
.
viewScore
)
monoScoredIncl
(
monoScoredExclHead
,
monoScoredExclTail
)
=
splitAt'
(
monoSize
*
inclSize
/
2
)
$
List
.
sortOn
(
Down
.
viewScore
)
monoScoredExcl
--splitAt' n' = (both (Map.fromListWith (<>))) . (List.splitAt (round $ n' * listSizeLocal))
splitAt'
n'
=
(
both
(
Map
.
fromList
))
.
(
List
.
splitAt
(
round
$
n'
*
listSizeLocal
))
sortOn
f
=
(
List
.
sortOn
(
Down
.
f
.
_gts'_score
.
snd
))
.
Map
.
toList
--sortOn f = (List.sortOn (Down . (gts'_score))) . Map.toList
-- sort = (List.sortOn (Down . viewScore))
(
monoScoredInclHead
,
monoScoredInclTail
)
=
splitAt'
(
monoSize
*
inclSize
/
2
)
$
(
sortOn
_scored_genInc
)
monoScoredIncl
(
monoScoredExclHead
,
monoScoredExclTail
)
=
splitAt'
(
monoSize
*
inclSize
/
2
)
$
(
sortOn
_scored_speExc
)
monoScoredExcl
(
multScoredInclHead
,
multScoredInclTail
)
=
splitAt'
(
multSize
*
exclSize
/
2
)
$
List
.
sortOn
(
Down
.
viewScore
)
multScoredIncl
(
multScoredExclHead
,
multScoredExclTail
)
=
splitAt'
(
multSize
*
exclSize
/
2
)
$
List
.
sortOn
(
Down
.
viewScore
)
multScoredExcl
(
multScoredInclHead
,
multScoredInclTail
)
=
splitAt'
(
multSize
*
exclSize
/
2
)
$
(
sortOn
_scored_genInc
)
multScoredIncl
(
multScoredExclHead
,
multScoredExclTail
)
=
splitAt'
(
multSize
*
exclSize
/
2
)
$
(
sortOn
_scored_speExc
)
multScoredExcl
-- Final Step building the Typed list
termListHead
=
maps
<>
cands
where
maps
=
setListType
(
Just
MapTerm
)
<$>
monoScoredInclHead
$
monoScoredInclHead
<>
monoScoredExclHead
<>
multScoredInclHead
<>
multScoredExclHead
cands
=
setListType
(
Just
CandidateTerm
)
<$>
monoScoredInclTail
$
monoScoredInclTail
<>
monoScoredExclTail
<>
multScoredInclTail
<>
multScoredExclTail
termListTail
=
map
(
setListType
(
Just
CandidateTerm
))
(
groupedMonoTail
<>
groupedMultTail
)
termListTail
=
(
setListType
(
Just
CandidateTerm
))
(
groupedMonoTail
<>
groupedMultTail
)
-- printDebug "monoScoredInclHead" monoScoredInclHead
-- printDebug "monoScoredExclHead" monoScoredExclTail
...
...
@@ -303,12 +342,13 @@ buildNgramsTermsList user uCid mCid groupParams (nt, mapListSize)= do
-- printDebug "multScoredExclTail" multScoredExclTail
let
result
=
Map
.
unionsWith
(
<>
)
[
Map
.
fromList
[(
nt
,
(
List
.
concat
$
map
toNgramsElement
$
termListHead
)
<>
(
List
.
concat
$
map
toNgramsElement
$
termListTail
)
<>
(
List
.
concat
$
map
toNgramsElement
$
stopTerms
)
[
Map
.
fromList
[(
nt
,
toNgramsElement
termListHead
<>
toNgramsElement
termListTail
<>
toNgramsElement
stopTerms
)]
]
-- printDebug "\n result \n" r
pure
result
...
...
src/Gargantext/Core/Text/List/Group.hs
View file @
480f7bb9
...
...
@@ -22,7 +22,7 @@ import Control.Lens (set, view)
import
Data.Set
(
Set
)
import
Data.Map
(
Map
)
import
Data.Maybe
(
fromMaybe
)
import
Data.Monoid
(
mempty
)
import
Data.Monoid
(
Monoid
,
mempty
)
import
Data.Text
(
Text
)
import
Gargantext.Core.Types
(
ListType
(
..
))
import
Gargantext.Database.Admin.Types.Node
(
NodeId
)
...
...
@@ -37,44 +37,26 @@ import qualified Data.List as List
------------------------------------------------------------------------
-- | TODO add group with stemming
toGroupedTree
::
(
Ord
a
,
Monoid
a
,
GroupWithStem
a
)
=>
GroupParams
->
FlowCont
Text
FlowListScores
->
Map
Text
a
-- -> Map Text (GroupedTreeScores (Set NodeId))
->
FlowCont
Text
(
GroupedTreeScores
a
)
toGroupedTree
groupParams
flc
scores
=
{-view flc_scores-}
flow2
where
flow1
=
groupWithScores'
flc
scoring
scoring
t
=
fromMaybe
mempty
$
Map
.
lookup
t
scores
class
ToGroupedTree
a
b
|
a
->
b
where
toGroupedTree
::
GroupParams
->
FlowCont
Text
FlowListScores
->
a
->
FlowCont
Text
(
GroupedTreeScores
b
)
flow2
=
case
(
view
flc_cont
flow1
)
==
Map
.
empty
of
True
->
flow1
False
->
groupWithStem'
groupParams
flow1
instance
ToGroupedTree
(
Map
Text
(
Set
NodeId
))
(
Set
NodeId
)
where
toGroupedTree
::
GroupParams
->
FlowCont
Text
FlowListScores
->
Map
Text
(
Set
NodeId
)
-- -> Map Text (GroupedTreeScores (Set NodeId))
->
FlowCont
Text
(
GroupedTreeScores
(
Set
NodeId
))
toGroupedTree
groupParams
flc
scores
=
{-view flc_scores-}
flow2
where
flow1
=
groupWithScores'
flc
scoring
scoring
t
=
fromMaybe
Set
.
empty
$
Map
.
lookup
t
scores
flow2
=
case
(
view
flc_cont
flow1
)
==
Map
.
empty
of
True
->
flow1
False
->
groupWithStem'
groupParams
flow1
instance
ToGroupedTree
(
Map
Text
Double
)
Double
where
toGroupedTree
::
GroupParams
->
FlowCont
Text
FlowListScores
->
Map
Text
Double
-- -> Map Text (GroupedTreeScores (Set NodeId))
->
FlowCont
Text
(
GroupedTreeScores
Double
)
toGroupedTree
groupParams
flc
scores
=
{-view flc_scores-}
flow2
where
flow1
=
groupWithScores'
flc
scoring
scoring
t
=
fromMaybe
mempty
$
Map
.
lookup
t
scores
setScoresWith
::
Map
Text
a
->
Map
Text
(
GroupedTreeScores
b
)
->
Map
Text
(
GroupedTreeScores
a
)
setScoresWith
=
undefined
flow2
=
case
(
view
flc_cont
flow1
)
==
Map
.
empty
of
True
->
flow1
False
->
groupWithStem'
groupParams
flow1
------------------------------------------------------------------------
------------------------------------------------------------------------
...
...
src/Gargantext/Core/Text/List/Group/Prelude.hs
View file @
480f7bb9
...
...
@@ -16,7 +16,7 @@ Portability : POSIX
module
Gargantext.Core.Text.List.Group.Prelude
where
import
Control.Lens
(
makeLenses
,
view
,
set
)
import
Control.Lens
(
makeLenses
,
view
,
set
,
over
)
import
Data.Monoid
import
Data.Semigroup
import
Data.Set
(
Set
)
...
...
@@ -25,6 +25,7 @@ import Data.Maybe (fromMaybe)
import
Data.Map
(
Map
)
import
Gargantext.Core.Types
(
ListType
(
..
))
import
Gargantext.Database.Admin.Types.Node
(
NodeId
)
import
Gargantext.Core.Text.Metrics
(
scored'
,
Scored
(
..
),
scored_speExc
,
scored_genInc
,
normalizeGlobal
,
normalizeLocal
)
import
Gargantext.API.Ngrams.Types
(
NgramsElement
,
mkNgramsElement
,
NgramsTerm
(
..
),
RootParent
(
..
),
mSetFromList
)
import
Gargantext.Prelude
import
qualified
Data.Set
as
Set
...
...
@@ -41,7 +42,7 @@ data GroupedTreeScores score =
,
_gts'_score
::
!
score
}
deriving
(
Show
,
Ord
,
Eq
)
instance
(
Semigroup
a
,
Ord
a
)
=>
Semigroup
(
GroupedTreeScores
a
)
where
instance
(
Semigroup
a
)
=>
Semigroup
(
GroupedTreeScores
a
)
where
(
<>
)
(
GroupedTreeScores
l1
s1
c1
)
(
GroupedTreeScores
l2
s2
c2
)
=
GroupedTreeScores
(
l1
<>
l2
)
...
...
@@ -62,12 +63,14 @@ class ViewListType a where
class
SetListType
a
where
setListType
::
Maybe
ListType
->
a
->
a
------
class
Ord
b
=>
ViewScore
a
b
|
a
->
b
where
viewScore
::
a
->
b
class
ViewScores
a
b
|
a
->
b
where
viewScores
::
a
->
b
--------
class
ToNgramsElement
a
where
toNgramsElement
::
a
->
[
NgramsElement
]
...
...
@@ -80,12 +83,24 @@ instance ViewListType (GroupedTreeScores a) where
viewListType
=
view
gts'_listType
instance
SetListType
(
GroupedTreeScores
a
)
where
setListType
=
set
gts'_listType
setListType
lt
g
=
over
gts'_children
(
setListType
lt
)
$
set
gts'_listType
lt
g
instance
SetListType
(
Map
Text
(
GroupedTreeScores
a
))
where
setListType
lt
=
Map
.
map
(
set
gts'_listType
lt
)
------
instance
ViewScore
(
GroupedTreeScores
Double
)
Double
where
viewScore
=
viewScores
instance
ViewScores
(
GroupedTreeScores
Double
)
Double
where
viewScores
g
=
sum
$
parent
:
children
where
parent
=
view
gts'_score
g
children
=
map
viewScores
$
Map
.
elems
$
view
gts'_children
g
instance
ViewScore
(
GroupedTreeScores
(
Set
NodeId
))
Int
where
viewScore
=
Set
.
size
.
viewScores
...
...
@@ -95,6 +110,10 @@ instance ViewScores (GroupedTreeScores (Set NodeId)) (Set NodeId) where
parent
=
view
gts'_score
g
children
=
map
viewScores
$
Map
.
elems
$
view
gts'_children
g
instance
ViewScore
(
GroupedTreeScores
(
Scored
Text
))
Double
where
viewScore
=
view
(
gts'_score
.
scored_genInc
)
------
instance
HasTerms
(
Map
Text
(
GroupedTreeScores
a
))
where
hasTerms
=
Set
.
unions
.
(
map
hasTerms
)
.
Map
.
toList
...
...
@@ -112,6 +131,7 @@ instance HasTerms (Text, GroupedTreeScores a) where
instance
ToNgramsElement
(
Map
Text
(
GroupedTreeScores
a
))
where
toNgramsElement
=
List
.
concat
.
(
map
toNgramsElement
)
.
Map
.
toList
instance
ToNgramsElement
(
Text
,
GroupedTreeScores
a
)
where
toNgramsElement
(
t
,
gts
)
=
parent
:
children
where
...
...
src/Gargantext/Core/Text/Metrics.hs
View file @
480f7bb9
...
...
@@ -11,13 +11,14 @@ Mainly reexport functions in @Data.Text.Metrics@
-}
{-# LANGUAGE
BangPatterns
#-}
{-# LANGUAGE
TemplateHaskell
#-}
module
Gargantext.Core.Text.Metrics
where
--import Data.Array.Accelerate ((:.)(..), Z(..))
--import Math.KMeans (kmeans, euclidSq, elements)
import
Control.Lens
(
makeLenses
)
import
Data.Map
(
Map
)
import
Gargantext.Prelude
import
Gargantext.Core.Methods.Distances.Accelerate.SpeGen
...
...
@@ -46,7 +47,7 @@ data Scored ts = Scored
{
_scored_terms
::
!
ts
,
_scored_genInc
::
!
GenericityInclusion
,
_scored_speExc
::
!
SpecificityExclusion
}
deriving
(
Show
)
}
deriving
(
Show
,
Eq
,
Ord
)
localMetrics'
::
Ord
t
=>
Map
(
t
,
t
)
Int
->
Map
t
(
Vec
.
Vector
Double
)
...
...
@@ -96,5 +97,5 @@ normalizeLocal (Scored t s1 s2) = Scored t (log' 5 s1) (log' 2 s2)
-- | Type Instances
makeLenses
'S
c
ored
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment