Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
153
Issues
153
List
Board
Labels
Milestones
Merge Requests
9
Merge Requests
9
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
gargantext
haskell-gargantext
Commits
01ad91a8
Commit
01ad91a8
authored
Sep 19, 2022
by
Alexandre Delanoë
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Doc/Comments
parent
5229f873
Pipeline
#3184
passed with stage
in 92 minutes and 18 seconds
Changes
2
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
62 additions
and
62 deletions
+62
-62
Tools.hs
src/Gargantext/Core/Viz/Phylo/API/Tools.hs
+2
-2
PhyloMaker.hs
src/Gargantext/Core/Viz/Phylo/PhyloMaker.hs
+60
-60
No files found.
src/Gargantext/Core/Viz/Phylo/API/Tools.hs
View file @
01ad91a8
...
@@ -87,9 +87,9 @@ phylo2dot2json phylo = do
...
@@ -87,9 +87,9 @@ phylo2dot2json phylo = do
flowPhyloAPI
::
PhyloConfig
->
CorpusId
->
GargNoServer
Phylo
flowPhyloAPI
::
PhyloConfig
->
CorpusId
->
GargNoServer
Phylo
flowPhyloAPI
config
cId
=
do
flowPhyloAPI
config
cId
=
do
(
mapList
,
corpus
)
<-
corpusIdtoDocuments
(
timeUnit
config
)
cId
(
mapList
,
corpus
)
<-
corpusIdtoDocuments
(
timeUnit
config
)
cId
phyloWithCliqu
es
<-
pure
$
toPhyloStep
corpus
mapList
config
temporalSeri
es
<-
pure
$
toPhyloStep
corpus
mapList
config
-- writePhylo phyloWithCliquesFile phyloWithCliques
-- writePhylo phyloWithCliquesFile phyloWithCliques
pure
$
toPhylo
(
setConfig
config
phyloWithCliqu
es
)
pure
$
toPhylo
(
setConfig
config
temporalSeri
es
)
--------------------------------------------------------------------
--------------------------------------------------------------------
corpusIdtoDocuments
::
TimeUnit
->
CorpusId
->
GargNoServer
(
TermList
,
[
Document
])
corpusIdtoDocuments
::
TimeUnit
->
CorpusId
->
GargNoServer
(
TermList
,
[
Document
])
...
...
src/Gargantext/Core/Viz/Phylo/PhyloMaker.hs
View file @
01ad91a8
...
@@ -44,8 +44,8 @@ data Phylo' = PhyloBase { _phylo'_phyloBase :: Phylo}
...
@@ -44,8 +44,8 @@ data Phylo' = PhyloBase { _phylo'_phyloBase :: Phylo}
toPhylo' :: Phylo' -> [Document] -> TermList -> PhyloConfig -> Phylo
toPhylo' :: Phylo' -> [Document] -> TermList -> PhyloConfig -> Phylo
toPhylo' (PhyloN phylo) = toPhylo'
toPhylo' (PhyloN phylo) = toPhylo'
toPhylo' (PhyloBase phylo) = toPhylo
toPhylo' (PhyloBase phylo) = toPhylo
-}
-}
...
@@ -54,11 +54,11 @@ toPhylo phyloStep = trace ("# phylo1 groups " <> show(length $ getGroupsFromLeve
...
@@ -54,11 +54,11 @@ toPhylo phyloStep = trace ("# phylo1 groups " <> show(length $ getGroupsFromLeve
$
traceToPhylo
(
phyloLevel
$
getConfig
phyloStep
)
$
$
traceToPhylo
(
phyloLevel
$
getConfig
phyloStep
)
$
if
(
phyloLevel
$
getConfig
phyloStep
)
>
1
if
(
phyloLevel
$
getConfig
phyloStep
)
>
1
then
foldl'
(
\
phylo'
_
->
synchronicClustering
phylo'
)
phyloAncestors
[
2
..
(
phyloLevel
$
getConfig
phyloStep
)]
then
foldl'
(
\
phylo'
_
->
synchronicClustering
phylo'
)
phyloAncestors
[
2
..
(
phyloLevel
$
getConfig
phyloStep
)]
else
phylo1
else
phylo1
where
where
--------------------------------------
--------------------------------------
phyloAncestors
::
Phylo
phyloAncestors
::
Phylo
phyloAncestors
=
phyloAncestors
=
if
(
findAncestors
$
getConfig
phyloStep
)
if
(
findAncestors
$
getConfig
phyloStep
)
then
toHorizon
phylo1
then
toHorizon
phylo1
else
phylo1
else
phylo1
...
@@ -73,31 +73,31 @@ toPhylo phyloStep = trace ("# phylo1 groups " <> show(length $ getGroupsFromLeve
...
@@ -73,31 +73,31 @@ toPhylo phyloStep = trace ("# phylo1 groups " <> show(length $ getGroupsFromLeve
--------------------
--------------------
toGroupsProxi
::
Level
->
Phylo
->
Phylo
toGroupsProxi
::
Level
->
Phylo
->
Phylo
toGroupsProxi
lvl
phylo
=
toGroupsProxi
lvl
phylo
=
let
proximity
=
phyloProximity
$
getConfig
phylo
let
proximity
=
phyloProximity
$
getConfig
phylo
groupsProxi
=
foldlWithKey
(
\
acc
pId
pds
->
groupsProxi
=
foldlWithKey
(
\
acc
pId
pds
->
-- 1) process period by period
-- 1) process period by period
let
egos
=
map
(
\
g
->
(
getGroupId
g
,
g
^.
phylo_groupNgrams
))
let
egos
=
map
(
\
g
->
(
getGroupId
g
,
g
^.
phylo_groupNgrams
))
$
elems
$
elems
$
view
(
phylo_periodLevels
$
view
(
phylo_periodLevels
.
traverse
.
filtered
(
\
phyloLvl
->
phyloLvl
^.
phylo_levelLevel
==
lvl
)
.
traverse
.
filtered
(
\
phyloLvl
->
phyloLvl
^.
phylo_levelLevel
==
lvl
)
.
phylo_levelGroups
)
pds
.
phylo_levelGroups
)
pds
next
=
getNextPeriods
ToParents
(
getTimeFrame
$
timeUnit
$
getConfig
phylo
)
pId
(
keys
$
phylo
^.
phylo_periods
)
next
=
getNextPeriods
ToParents
(
getTimeFrame
$
timeUnit
$
getConfig
phylo
)
pId
(
keys
$
phylo
^.
phylo_periods
)
targets
=
map
(
\
g
->
(
getGroupId
g
,
g
^.
phylo_groupNgrams
))
$
getGroupsFromLevelPeriods
lvl
next
phylo
targets
=
map
(
\
g
->
(
getGroupId
g
,
g
^.
phylo_groupNgrams
))
$
getGroupsFromLevelPeriods
lvl
next
phylo
docs
=
filterDocs
(
phylo
^.
phylo_timeDocs
)
([
pId
]
++
next
)
docs
=
filterDocs
(
phylo
^.
phylo_timeDocs
)
([
pId
]
++
next
)
diagos
=
filterDiago
(
phylo
^.
phylo_timeCooc
)
([
pId
]
++
next
)
diagos
=
filterDiago
(
phylo
^.
phylo_timeCooc
)
([
pId
]
++
next
)
-- 2) compute the pairs in parallel
-- 2) compute the pairs in parallel
pairs
=
map
(
\
(
id
,
ngrams
)
->
pairs
=
map
(
\
(
id
,
ngrams
)
->
map
(
\
(
id'
,
ngrams'
)
->
map
(
\
(
id'
,
ngrams'
)
->
let
nbDocs
=
(
sum
.
elems
)
$
filterDocs
docs
([
idToPrd
id
,
idToPrd
id'
])
let
nbDocs
=
(
sum
.
elems
)
$
filterDocs
docs
([
idToPrd
id
,
idToPrd
id'
])
diago
=
reduceDiagos
$
filterDiago
diagos
([
idToPrd
id
,
idToPrd
id'
])
diago
=
reduceDiagos
$
filterDiago
diagos
([
idToPrd
id
,
idToPrd
id'
])
in
((
id
,
id'
),
toProximity
nbDocs
diago
proximity
ngrams
ngrams'
ngrams'
)
in
((
id
,
id'
),
toProximity
nbDocs
diago
proximity
ngrams
ngrams'
ngrams'
)
)
$
filter
(
\
(
_
,
ngrams'
)
->
(
not
.
null
)
$
intersect
ngrams
ngrams'
)
targets
)
$
filter
(
\
(
_
,
ngrams'
)
->
(
not
.
null
)
$
intersect
ngrams
ngrams'
)
targets
)
egos
)
egos
pairs'
=
pairs
`
using
`
parList
rdeepseq
pairs'
=
pairs
`
using
`
parList
rdeepseq
in
acc
++
(
concat
pairs'
)
in
acc
++
(
concat
pairs'
)
)
[]
$
phylo
^.
phylo_periods
)
[]
$
phylo
^.
phylo_periods
in
phylo
&
phylo_groupsProxi
.~
((
traceGroupsProxi
.
fromList
)
groupsProxi
)
in
phylo
&
phylo_groupsProxi
.~
((
traceGroupsProxi
.
fromList
)
groupsProxi
)
appendGroups
::
(
a
->
PhyloPeriodId
->
(
Text
,
Text
)
->
Level
->
Int
->
[
Cooc
]
->
PhyloGroup
)
->
Level
->
Map
(
Date
,
Date
)
[
a
]
->
Phylo
->
Phylo
appendGroups
::
(
a
->
PhyloPeriodId
->
(
Text
,
Text
)
->
Level
->
Int
->
[
Cooc
]
->
PhyloGroup
)
->
Level
->
Map
(
Date
,
Date
)
[
a
]
->
Phylo
->
Phylo
...
@@ -109,17 +109,17 @@ appendGroups f lvl m phylo = trace ("\n" <> "-- | Append " <> show (length $ co
...
@@ -109,17 +109,17 @@ appendGroups f lvl m phylo = trace ("\n" <> "-- | Append " <> show (length $ co
(
\
phyloLvl
->
if
lvl
==
(
phyloLvl
^.
phylo_levelLevel
)
(
\
phyloLvl
->
if
lvl
==
(
phyloLvl
^.
phylo_levelLevel
)
then
then
let
pId
=
phyloLvl
^.
phylo_levelPeriod
let
pId
=
phyloLvl
^.
phylo_levelPeriod
pId'
=
phyloLvl
^.
phylo_levelPeriod'
pId'
=
phyloLvl
^.
phylo_levelPeriod'
phyloCUnit
=
m
!
pId
phyloCUnit
=
m
!
pId
in
phyloLvl
in
phyloLvl
&
phylo_levelGroups
.~
(
fromList
$
foldl
(
\
groups
obj
->
&
phylo_levelGroups
.~
(
fromList
$
foldl
(
\
groups
obj
->
groups
++
[
(((
pId
,
lvl
),
length
groups
)
groups
++
[
(((
pId
,
lvl
),
length
groups
)
,
f
obj
pId
pId'
lvl
(
length
groups
)
,
f
obj
pId
pId'
lvl
(
length
groups
)
(
elems
$
restrictKeys
(
phylo
^.
phylo_timeCooc
)
$
periodsToYears
[
pId
]))
(
elems
$
restrictKeys
(
phylo
^.
phylo_timeCooc
)
$
periodsToYears
[
pId
]))
]
)
[]
phyloCUnit
)
]
)
[]
phyloCUnit
)
else
else
phyloLvl
)
phyloLvl
)
phylo
phylo
cliqueToGroup
::
PhyloClique
->
PhyloPeriodId
->
(
Text
,
Text
)
->
Level
->
Int
->
[
Cooc
]
->
PhyloGroup
cliqueToGroup
::
PhyloClique
->
PhyloPeriodId
->
(
Text
,
Text
)
->
Level
->
Int
->
[
Cooc
]
->
PhyloGroup
...
@@ -135,22 +135,22 @@ cliqueToGroup fis pId pId' lvl idx coocs = PhyloGroup pId pId' lvl idx ""
...
@@ -135,22 +135,22 @@ cliqueToGroup fis pId pId' lvl idx coocs = PhyloGroup pId pId' lvl idx ""
toPhylo1
::
Phylo
->
Phylo
toPhylo1
::
Phylo
->
Phylo
toPhylo1
phyloStep
=
case
(
getSeaElevation
phyloStep
)
of
toPhylo1
phyloStep
=
case
(
getSeaElevation
phyloStep
)
of
Constante
start
gap
->
constanteTemporalMatching
start
gap
phyloStep
Constante
start
gap
->
constanteTemporalMatching
start
gap
phyloStep
Adaptative
steps
->
adaptativeTemporalMatching
steps
phyloStep
Adaptative
steps
->
adaptativeTemporalMatching
steps
phyloStep
-----------------------
-----------------------
-- | To Phylo Step | --
-- | To Phylo Step | --
-----------------------
-----------------------
indexDates'
::
Map
(
Date
,
Date
)
[
Document
]
->
Map
(
Date
,
Date
)
(
Text
,
Text
)
indexDates'
::
Map
(
Date
,
Date
)
[
Document
]
->
Map
(
Date
,
Date
)
(
Text
,
Text
)
indexDates'
m
=
map
(
\
docs
->
indexDates'
m
=
map
(
\
docs
->
let
ds
=
map
(
\
d
->
date'
d
)
docs
let
ds
=
map
(
\
d
->
date'
d
)
docs
f
=
if
(
null
ds
)
f
=
if
(
null
ds
)
then
""
then
""
else
toFstDate
ds
else
toFstDate
ds
l
=
if
(
null
ds
)
l
=
if
(
null
ds
)
then
""
then
""
else
toLstDate
ds
else
toLstDate
ds
in
(
f
,
l
))
m
in
(
f
,
l
))
m
...
@@ -159,9 +159,9 @@ indexDates' m = map (\docs ->
...
@@ -159,9 +159,9 @@ indexDates' m = map (\docs ->
-- To build the first phylo step from docs and terms
-- To build the first phylo step from docs and terms
-- QL: backend entre phyloBase et phyloClique
-- QL: backend entre phyloBase et phyloClique
toPhyloStep
::
[
Document
]
->
TermList
->
PhyloConfig
->
Phylo
toPhyloStep
::
[
Document
]
->
TermList
->
PhyloConfig
->
Phylo
toPhyloStep
docs
lst
conf
=
case
(
getSeaElevation
phyloBase
)
of
toPhyloStep
docs
lst
conf
=
case
(
getSeaElevation
phyloBase
)
of
Constante
_
_
->
appendGroups
cliqueToGroup
1
phyloClique
(
updatePeriods
(
indexDates'
docs'
)
phyloBase
)
Constante
_
_
->
appendGroups
cliqueToGroup
1
phyloClique
(
updatePeriods
(
indexDates'
docs'
)
phyloBase
)
Adaptative
_
->
toGroupsProxi
1
Adaptative
_
->
toGroupsProxi
1
$
appendGroups
cliqueToGroup
1
phyloClique
(
updatePeriods
(
indexDates'
docs'
)
phyloBase
)
$
appendGroups
cliqueToGroup
1
phyloClique
(
updatePeriods
(
indexDates'
docs'
)
phyloBase
)
where
where
--------------------------------------
--------------------------------------
...
@@ -200,23 +200,23 @@ filterCliqueBySize thr l = filter (\clq -> (length $ clq ^. phyloClique_nodes) >
...
@@ -200,23 +200,23 @@ filterCliqueBySize thr l = filter (\clq -> (length $ clq ^. phyloClique_nodes) >
-- To filter nested Fis
-- To filter nested Fis
filterCliqueByNested
::
Map
(
Date
,
Date
)
[
PhyloClique
]
->
Map
(
Date
,
Date
)
[
PhyloClique
]
filterCliqueByNested
::
Map
(
Date
,
Date
)
[
PhyloClique
]
->
Map
(
Date
,
Date
)
[
PhyloClique
]
filterCliqueByNested
m
=
filterCliqueByNested
m
=
let
clq
=
map
(
\
l
->
let
clq
=
map
(
\
l
->
foldl
(
\
mem
f
->
if
(
any
(
\
f'
->
isNested
(
f'
^.
phyloClique_nodes
)
(
f
^.
phyloClique_nodes
))
mem
)
foldl
(
\
mem
f
->
if
(
any
(
\
f'
->
isNested
(
f'
^.
phyloClique_nodes
)
(
f
^.
phyloClique_nodes
))
mem
)
then
mem
then
mem
else
else
let
fMax
=
filter
(
\
f'
->
not
$
isNested
(
f
^.
phyloClique_nodes
)
(
f'
^.
phyloClique_nodes
))
mem
let
fMax
=
filter
(
\
f'
->
not
$
isNested
(
f
^.
phyloClique_nodes
)
(
f'
^.
phyloClique_nodes
))
mem
in
fMax
++
[
f
]
)
[]
l
)
in
fMax
++
[
f
]
)
[]
l
)
$
elems
m
$
elems
m
clq'
=
clq
`
using
`
parList
rdeepseq
clq'
=
clq
`
using
`
parList
rdeepseq
in
fromList
$
zip
(
keys
m
)
clq'
in
fromList
$
zip
(
keys
m
)
clq'
-- | To transform a time map of docs into a time map of Fis with some filters
-- | To transform a time map of docs into a time map of Fis with some filters
toPhyloClique
::
Phylo
->
Map
(
Date
,
Date
)
[
Document
]
->
Map
(
Date
,
Date
)
[
PhyloClique
]
toPhyloClique
::
Phylo
->
Map
(
Date
,
Date
)
[
Document
]
->
Map
(
Date
,
Date
)
[
PhyloClique
]
toPhyloClique
phylo
phyloDocs
=
case
(
clique
$
getConfig
phylo
)
of
toPhyloClique
phylo
phyloDocs
=
case
(
clique
$
getConfig
phylo
)
of
Fis
s
s'
->
-- traceFis "Filtered Fis"
Fis
s
s'
->
-- traceFis "Filtered Fis"
filterCliqueByNested
filterCliqueByNested
{- \$ traceFis "Filtered by clique size" -}
{- \$ traceFis "Filtered by clique size" -}
$
filterClique
True
s'
(
filterCliqueBySize
)
$
filterClique
True
s'
(
filterCliqueBySize
)
{- \$ traceFis "Filtered by support" -}
{- \$ traceFis "Filtered by support" -}
...
@@ -226,33 +226,33 @@ toPhyloClique phylo phyloDocs = case (clique $ getConfig phylo) of
...
@@ -226,33 +226,33 @@ toPhyloClique phylo phyloDocs = case (clique $ getConfig phylo) of
MaxClique
s
_
_
->
filterClique
True
s
(
filterCliqueBySize
)
MaxClique
s
_
_
->
filterClique
True
s
(
filterCliqueBySize
)
phyloClique
phyloClique
where
where
--------------------------------------
--------------------------------------
phyloClique
::
Map
(
Date
,
Date
)
[
PhyloClique
]
phyloClique
::
Map
(
Date
,
Date
)
[
PhyloClique
]
phyloClique
=
case
(
clique
$
getConfig
phylo
)
of
phyloClique
=
case
(
clique
$
getConfig
phylo
)
of
Fis
_
_
->
Fis
_
_
->
let
fis
=
map
(
\
(
prd
,
docs
)
->
let
fis
=
map
(
\
(
prd
,
docs
)
->
case
(
corpusParser
$
getConfig
phylo
)
of
case
(
corpusParser
$
getConfig
phylo
)
of
Csv'
_
->
let
lst
=
toList
Csv'
_
->
let
lst
=
toList
$
fisWithSizePolyMap'
(
Segment
1
20
)
1
(
map
(
\
d
->
(
ngramsToIdx
(
text
d
)
(
getRoots
phylo
),
(
weight
d
,
(
sourcesToIdx
(
sources
d
)
(
getSources
phylo
)))))
docs
)
$
fisWithSizePolyMap'
(
Segment
1
20
)
1
(
map
(
\
d
->
(
ngramsToIdx
(
text
d
)
(
getRoots
phylo
),
(
weight
d
,
(
sourcesToIdx
(
sources
d
)
(
getSources
phylo
)))))
docs
)
in
(
prd
,
map
(
\
f
->
PhyloClique
(
Set
.
toList
$
fst
f
)
((
fst
.
snd
)
f
)
prd
((
fst
.
snd
.
snd
)
f
)
(((
snd
.
snd
.
snd
)
f
)))
lst
)
in
(
prd
,
map
(
\
f
->
PhyloClique
(
Set
.
toList
$
fst
f
)
((
fst
.
snd
)
f
)
prd
((
fst
.
snd
.
snd
)
f
)
(((
snd
.
snd
.
snd
)
f
)))
lst
)
_
->
let
lst
=
toList
_
->
let
lst
=
toList
$
fisWithSizePolyMap
(
Segment
1
20
)
1
(
map
(
\
d
->
ngramsToIdx
(
text
d
)
(
getRoots
phylo
))
docs
)
$
fisWithSizePolyMap
(
Segment
1
20
)
1
(
map
(
\
d
->
ngramsToIdx
(
text
d
)
(
getRoots
phylo
))
docs
)
in
(
prd
,
map
(
\
f
->
PhyloClique
(
Set
.
toList
$
fst
f
)
(
snd
f
)
prd
(
Just
$
fromIntegral
$
snd
f
)
[]
)
lst
)
in
(
prd
,
map
(
\
f
->
PhyloClique
(
Set
.
toList
$
fst
f
)
(
snd
f
)
prd
(
Just
$
fromIntegral
$
snd
f
)
[]
)
lst
)
)
)
$
toList
phyloDocs
$
toList
phyloDocs
fis'
=
fis
`
using
`
parList
rdeepseq
fis'
=
fis
`
using
`
parList
rdeepseq
in
fromList
fis'
in
fromList
fis'
MaxClique
_
thr
filterType
->
MaxClique
_
thr
filterType
->
let
mcl
=
map
(
\
(
prd
,
docs
)
->
let
mcl
=
map
(
\
(
prd
,
docs
)
->
let
cooc
=
map
round
let
cooc
=
map
round
$
foldl
sumCooc
empty
$
foldl
sumCooc
empty
$
map
listToMatrix
$
map
listToMatrix
$
map
(
\
d
->
ngramsToIdx
(
text
d
)
(
getRoots
phylo
))
docs
$
map
(
\
d
->
ngramsToIdx
(
text
d
)
(
getRoots
phylo
))
docs
in
(
prd
,
map
(
\
cl
->
PhyloClique
cl
0
prd
Nothing
[]
)
$
getMaxCliques
filterType
Conditional
thr
cooc
))
in
(
prd
,
map
(
\
cl
->
PhyloClique
cl
0
prd
Nothing
[]
)
$
getMaxCliques
filterType
Conditional
thr
cooc
))
$
toList
phyloDocs
$
toList
phyloDocs
mcl'
=
mcl
`
using
`
parList
rdeepseq
mcl'
=
mcl
`
using
`
parList
rdeepseq
in
fromList
mcl'
in
fromList
mcl'
--------------------------------------
--------------------------------------
-- dev viz graph maxClique getMaxClique
-- dev viz graph maxClique getMaxClique
...
@@ -262,9 +262,9 @@ toPhyloClique phylo phyloDocs = case (clique $ getConfig phylo) of
...
@@ -262,9 +262,9 @@ toPhyloClique phylo phyloDocs = case (clique $ getConfig phylo) of
--------------------
--------------------
-- To transform the docs into a time map of coocurency matrix
-- To transform the docs into a time map of coocurency matrix
docsToTimeScaleCooc
::
[
Document
]
->
Vector
Ngrams
->
Map
Date
Cooc
docsToTimeScaleCooc
::
[
Document
]
->
Vector
Ngrams
->
Map
Date
Cooc
docsToTimeScaleCooc
docs
fdt
=
docsToTimeScaleCooc
docs
fdt
=
let
mCooc
=
fromListWith
sumCooc
let
mCooc
=
fromListWith
sumCooc
$
map
(
\
(
_d
,
l
)
->
(
_d
,
listToMatrix
l
))
$
map
(
\
(
_d
,
l
)
->
(
_d
,
listToMatrix
l
))
$
map
(
\
doc
->
(
date
doc
,
sort
$
ngramsToIdx
(
text
doc
)
fdt
))
docs
$
map
(
\
doc
->
(
date
doc
,
sort
$
ngramsToIdx
(
text
doc
)
fdt
))
docs
...
@@ -282,8 +282,8 @@ docsToTimeScaleCooc docs fdt =
...
@@ -282,8 +282,8 @@ docsToTimeScaleCooc docs fdt =
groupDocsByPeriodRec
::
(
NFData
doc
,
Ord
date
,
Enum
date
)
=>
(
doc
->
date
)
->
[(
date
,
date
)]
->
[
doc
]
->
Map
(
date
,
date
)
[
doc
]
->
Map
(
date
,
date
)
[
doc
]
groupDocsByPeriodRec
::
(
NFData
doc
,
Ord
date
,
Enum
date
)
=>
(
doc
->
date
)
->
[(
date
,
date
)]
->
[
doc
]
->
Map
(
date
,
date
)
[
doc
]
->
Map
(
date
,
date
)
[
doc
]
groupDocsByPeriodRec
f
prds
docs
acc
=
groupDocsByPeriodRec
f
prds
docs
acc
=
if
((
null
prds
)
||
(
null
docs
))
if
((
null
prds
)
||
(
null
docs
))
then
acc
then
acc
else
else
let
prd
=
head'
"groupBy"
prds
let
prd
=
head'
"groupBy"
prds
docs'
=
partition
(
\
d
->
(
f
d
>=
fst
prd
)
&&
(
f
d
<=
snd
prd
))
docs
docs'
=
partition
(
\
d
->
(
f
d
>=
fst
prd
)
&&
(
f
d
<=
snd
prd
))
docs
in
groupDocsByPeriodRec
f
(
tail
prds
)
(
snd
docs'
)
(
insert
prd
(
fst
docs'
)
acc
)
in
groupDocsByPeriodRec
f
(
tail
prds
)
(
snd
docs'
)
(
insert
prd
(
fst
docs'
)
acc
)
...
@@ -295,7 +295,7 @@ groupDocsByPeriod' f pds docs =
...
@@ -295,7 +295,7 @@ groupDocsByPeriod' f pds docs =
let
docs'
=
groupBy
(
\
d
d'
->
f
d
==
f
d'
)
$
sortOn
f
docs
let
docs'
=
groupBy
(
\
d
d'
->
f
d
==
f
d'
)
$
sortOn
f
docs
periods
=
map
(
inPeriode
f
docs'
)
pds
periods
=
map
(
inPeriode
f
docs'
)
pds
periods'
=
periods
`
using
`
parList
rdeepseq
periods'
=
periods
`
using
`
parList
rdeepseq
in
trace
(
"
\n
"
<>
"-- | Group "
<>
show
(
length
docs
)
<>
" docs by "
<>
show
(
length
pds
)
<>
" periods"
<>
"
\n
"
)
in
trace
(
"
\n
"
<>
"-- | Group "
<>
show
(
length
docs
)
<>
" docs by "
<>
show
(
length
pds
)
<>
" periods"
<>
"
\n
"
)
$
fromList
$
zip
pds
periods'
$
fromList
$
zip
pds
periods'
where
where
--------------------------------------
--------------------------------------
...
@@ -312,14 +312,14 @@ groupDocsByPeriod f pds es =
...
@@ -312,14 +312,14 @@ groupDocsByPeriod f pds es =
let
periods
=
map
(
inPeriode
f
es
)
pds
let
periods
=
map
(
inPeriode
f
es
)
pds
periods'
=
periods
`
using
`
parList
rdeepseq
periods'
=
periods
`
using
`
parList
rdeepseq
in
trace
(
"
\n
"
<>
"-- | Group "
<>
show
(
length
es
)
<>
" docs by "
<>
show
(
length
pds
)
<>
" periods"
<>
"
\n
"
)
in
trace
(
"
\n
"
<>
"-- | Group "
<>
show
(
length
es
)
<>
" docs by "
<>
show
(
length
pds
)
<>
" periods"
<>
"
\n
"
)
$
fromList
$
zip
pds
periods'
$
fromList
$
zip
pds
periods'
where
where
--------------------------------------
--------------------------------------
inPeriode
::
Ord
b
=>
(
t
->
b
)
->
[
t
]
->
(
b
,
b
)
->
[
t
]
inPeriode
::
Ord
b
=>
(
t
->
b
)
->
[
t
]
->
(
b
,
b
)
->
[
t
]
inPeriode
f'
h
(
start
,
end
)
=
inPeriode
f'
h
(
start
,
end
)
=
fst
$
partition
(
\
d
->
f'
d
>=
start
&&
f'
d
<=
end
)
h
fst
$
partition
(
\
d
->
f'
d
>=
start
&&
f'
d
<=
end
)
h
--------------------------------------
--------------------------------------
docsToTermFreq
::
[
Document
]
->
Vector
Ngrams
->
Map
Int
Double
docsToTermFreq
::
[
Document
]
->
Vector
Ngrams
->
Map
Int
Double
...
@@ -327,46 +327,46 @@ docsToTermFreq docs fdt =
...
@@ -327,46 +327,46 @@ docsToTermFreq docs fdt =
let
nbDocs
=
fromIntegral
$
length
docs
let
nbDocs
=
fromIntegral
$
length
docs
freqs
=
map
(
/
(
nbDocs
))
freqs
=
map
(
/
(
nbDocs
))
$
fromList
$
fromList
$
map
(
\
lst
->
(
head'
"docsToTermFreq"
lst
,
fromIntegral
$
length
lst
))
$
map
(
\
lst
->
(
head'
"docsToTermFreq"
lst
,
fromIntegral
$
length
lst
))
$
group
$
sort
$
concat
$
map
(
\
d
->
nub
$
ngramsToIdx
(
text
d
)
fdt
)
docs
$
group
$
sort
$
concat
$
map
(
\
d
->
nub
$
ngramsToIdx
(
text
d
)
fdt
)
docs
sumFreqs
=
sum
$
elems
freqs
sumFreqs
=
sum
$
elems
freqs
in
map
(
/
sumFreqs
)
freqs
in
map
(
/
sumFreqs
)
freqs
docsToLastTermFreq
::
Int
->
[
Document
]
->
Vector
Ngrams
->
Map
Int
Double
docsToLastTermFreq
::
Int
->
[
Document
]
->
Vector
Ngrams
->
Map
Int
Double
docsToLastTermFreq
n
docs
fdt
=
docsToLastTermFreq
n
docs
fdt
=
let
last
=
take
n
$
reverse
$
sort
$
map
date
docs
let
last
=
take
n
$
reverse
$
sort
$
map
date
docs
nbDocs
=
fromIntegral
$
length
$
filter
(
\
d
->
elem
(
date
d
)
last
)
docs
nbDocs
=
fromIntegral
$
length
$
filter
(
\
d
->
elem
(
date
d
)
last
)
docs
freqs
=
map
(
/
(
nbDocs
))
freqs
=
map
(
/
(
nbDocs
))
$
fromList
$
fromList
$
map
(
\
lst
->
(
head'
"docsToLastTermFreq"
lst
,
fromIntegral
$
length
lst
))
$
map
(
\
lst
->
(
head'
"docsToLastTermFreq"
lst
,
fromIntegral
$
length
lst
))
$
group
$
sort
$
concat
$
map
(
\
d
->
nub
$
ngramsToIdx
(
text
d
)
fdt
)
$
filter
(
\
d
->
elem
(
date
d
)
last
)
docs
$
group
$
sort
$
concat
$
map
(
\
d
->
nub
$
ngramsToIdx
(
text
d
)
fdt
)
$
filter
(
\
d
->
elem
(
date
d
)
last
)
docs
sumFreqs
=
sum
$
elems
freqs
sumFreqs
=
sum
$
elems
freqs
in
map
(
/
sumFreqs
)
freqs
in
map
(
/
sumFreqs
)
freqs
-- To count the number of docs by unit of time
-- To count the number of docs by unit of time
docsToTimeScaleNb
::
[
Document
]
->
Map
Date
Double
docsToTimeScaleNb
::
[
Document
]
->
Map
Date
Double
docsToTimeScaleNb
docs
=
docsToTimeScaleNb
docs
=
let
docs'
=
fromListWith
(
+
)
$
map
(
\
d
->
(
date
d
,
1
))
docs
let
docs'
=
fromListWith
(
+
)
$
map
(
\
d
->
(
date
d
,
1
))
docs
time
=
fromList
$
map
(
\
t
->
(
t
,
0
))
$
toTimeScale
(
keys
docs'
)
1
time
=
fromList
$
map
(
\
t
->
(
t
,
0
))
$
toTimeScale
(
keys
docs'
)
1
in
trace
(
"
\n
"
<>
"-- | Group "
<>
show
(
length
docs
)
<>
" docs by "
<>
show
(
length
time
)
<>
" unit of time"
<>
"
\n
"
)
in
trace
(
"
\n
"
<>
"-- | Group "
<>
show
(
length
docs
)
<>
" docs by "
<>
show
(
length
time
)
<>
" unit of time"
<>
"
\n
"
)
$
unionWith
(
+
)
time
docs'
$
unionWith
(
+
)
time
docs'
initPhyloLevels
::
Int
->
PhyloPeriodId
->
Map
PhyloLevelId
PhyloLevel
initPhyloLevels
::
Int
->
PhyloPeriodId
->
Map
PhyloLevelId
PhyloLevel
initPhyloLevels
lvlMax
pId
=
initPhyloLevels
lvlMax
pId
=
fromList
$
map
(
\
lvl
->
((
pId
,
lvl
),
PhyloLevel
pId
(
""
,
""
)
lvl
empty
))
[
1
..
lvlMax
]
fromList
$
map
(
\
lvl
->
((
pId
,
lvl
),
PhyloLevel
pId
(
""
,
""
)
lvl
empty
))
[
1
..
lvlMax
]
-- To init the basic elements of a Phylo
-- To init the basic elements of a Phylo
toPhyloBase
::
[
Document
]
->
TermList
->
PhyloConfig
->
Phylo
toPhyloBase
::
[
Document
]
->
TermList
->
PhyloConfig
->
Phylo
toPhyloBase
docs
lst
conf
=
toPhyloBase
docs
lst
conf
=
let
foundations
=
PhyloFoundations
(
Vector
.
fromList
$
nub
$
concat
$
map
text
docs
)
lst
let
foundations
=
PhyloFoundations
(
Vector
.
fromList
$
nub
$
concat
$
map
text
docs
)
lst
docsSources
=
PhyloSources
(
Vector
.
fromList
$
nub
$
concat
$
map
sources
docs
)
docsSources
=
PhyloSources
(
Vector
.
fromList
$
nub
$
concat
$
map
sources
docs
)
params
=
defaultPhyloParam
{
_phyloParam_config
=
conf
}
params
=
defaultPhyloParam
{
_phyloParam_config
=
conf
}
periods
=
toPeriods
(
sort
$
nub
$
map
date
docs
)
(
getTimePeriod
$
timeUnit
conf
)
(
getTimeStep
$
timeUnit
conf
)
periods
=
toPeriods
(
sort
$
nub
$
map
date
docs
)
(
getTimePeriod
$
timeUnit
conf
)
(
getTimeStep
$
timeUnit
conf
)
in
trace
(
"
\n
"
<>
"-- | Create PhyloBase out of "
<>
show
(
length
docs
)
<>
" docs
\n
"
)
in
trace
(
"
\n
"
<>
"-- | Create PhyloBase out of "
<>
show
(
length
docs
)
<>
" docs
\n
"
)
$
Phylo
foundations
$
Phylo
foundations
docsSources
docsSources
(
docsToTimeScaleCooc
docs
(
foundations
^.
foundations_roots
))
(
docsToTimeScaleCooc
docs
(
foundations
^.
foundations_roots
))
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment