Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
H
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Przemyslaw Kaminski
haskell-gargantext
Commits
5be28fb7
Commit
5be28fb7
authored
May 21, 2021
by
qlobbe
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
add a list parser param
parent
6bddaf45
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
68 additions
and
18 deletions
+68
-18
Main.hs
bin/gargantext-adaptative-phylo/Main.hs
+46
-12
package.yaml
package.yaml
+2
-0
AdaptativePhylo.hs
src/Gargantext/Core/Viz/AdaptativePhylo.hs
+13
-3
PhyloTools.hs
src/Gargantext/Core/Viz/Phylo/PhyloTools.hs
+7
-3
No files found.
bin/gargantext-adaptative-phylo/Main.hs
View file @
5be28fb7
...
@@ -34,7 +34,11 @@ import Gargantext.Core.Viz.AdaptativePhylo
...
@@ -34,7 +34,11 @@ import Gargantext.Core.Viz.AdaptativePhylo
import
Gargantext.Core.Viz.Phylo.PhyloMaker
(
toPhylo
,
toPhyloStep
)
import
Gargantext.Core.Viz.Phylo.PhyloMaker
(
toPhylo
,
toPhyloStep
)
import
Gargantext.Core.Viz.Phylo.PhyloTools
(
printIOMsg
,
printIOComment
,
setConfig
)
import
Gargantext.Core.Viz.Phylo.PhyloTools
(
printIOMsg
,
printIOComment
,
setConfig
)
import
Gargantext.Core.Viz.Phylo.PhyloExport
(
toPhyloExport
,
dotToFile
)
import
Gargantext.Core.Viz.Phylo.PhyloExport
(
toPhyloExport
,
dotToFile
)
-- import Gargantext.API.Ngrams.Prelude (toTermList)
import
Gargantext.API.Ngrams.Types
import
Gargantext.Core.Types.Main
(
ListType
(
..
))
import
Gargantext.Database.Schema.Ngrams
(
NgramsType
(
..
))
import
Gargantext.API.Ngrams.Prelude
(
toTermList
)
-- import qualified Gargantext.Database.Query.Table.Ngrams as TableNgrams
import
GHC.IO
(
FilePath
)
import
GHC.IO
(
FilePath
)
import
Prelude
(
Either
(
Left
,
Right
),
toInteger
)
import
Prelude
(
Either
(
Left
,
Right
),
toInteger
)
...
@@ -43,6 +47,7 @@ import System.Directory (listDirectory,doesFileExist)
...
@@ -43,6 +47,7 @@ import System.Directory (listDirectory,doesFileExist)
import
Control.Concurrent.Async
(
mapConcurrently
)
import
Control.Concurrent.Async
(
mapConcurrently
)
import
Data.Time.Calendar
(
fromGregorian
,
diffGregorianDurationClip
,
cdMonths
,
diffDays
,
showGregorian
)
import
Data.Time.Calendar
(
fromGregorian
,
diffGregorianDurationClip
,
cdMonths
,
diffDays
,
showGregorian
)
import
Data.Time.Clock.POSIX
(
posixSecondsToUTCTime
)
import
qualified
Data.ByteString.Char8
as
C8
import
qualified
Data.ByteString.Char8
as
C8
import
qualified
Data.ByteString.Lazy
as
Lazy
import
qualified
Data.ByteString.Lazy
as
Lazy
...
@@ -50,6 +55,8 @@ import qualified Data.Vector as Vector
...
@@ -50,6 +55,8 @@ import qualified Data.Vector as Vector
import
qualified
Gargantext.Core.Text.Corpus.Parsers.CSV
as
Csv
import
qualified
Gargantext.Core.Text.Corpus.Parsers.CSV
as
Csv
import
qualified
Data.Text
as
T
import
qualified
Data.Text
as
T
import
Data.List.Split
-- import Debug.Trace (trace)
-- import Debug.Trace (trace)
data
PhyloStage
=
PhyloWithCliques
|
PhyloWithLinks
deriving
(
Show
)
data
PhyloStage
=
PhyloWithCliques
|
PhyloWithLinks
deriving
(
Show
)
...
@@ -83,14 +90,21 @@ toDays y m d = fromIntegral
...
@@ -83,14 +90,21 @@ toDays y m d = fromIntegral
toPhyloDate
::
Int
->
Int
->
Int
->
TimeUnit
->
Date
toPhyloDate
::
Int
->
Int
->
Int
->
TimeUnit
->
Date
toPhyloDate
y
m
d
tu
=
case
tu
of
toPhyloDate
y
m
d
tu
=
case
tu
of
Epoch
_
_
_
->
y
Year
_
_
_
->
y
Year
_
_
_
->
y
Month
_
_
_
->
toMonths
(
toInteger
y
)
m
d
Month
_
_
_
->
toMonths
(
toInteger
y
)
m
d
Week
_
_
_
->
div
(
toDays
(
toInteger
y
)
m
d
)
7
Week
_
_
_
->
div
(
toDays
(
toInteger
y
)
m
d
)
7
Day
_
_
_
->
toDays
(
toInteger
y
)
m
d
Day
_
_
_
->
toDays
(
toInteger
y
)
m
d
toPhyloDate'
::
Int
->
Int
->
Int
->
Text
toPhyloDate'
::
Int
->
Int
->
Int
->
TimeUnit
->
Text
toPhyloDate'
y
m
d
=
pack
$
showGregorian
$
fromGregorian
(
toInteger
y
)
m
d
toPhyloDate'
y
m
d
tu
=
case
tu
of
Epoch
_
_
_
->
pack
$
show
$
posixSecondsToUTCTime
$
fromIntegral
y
Year
_
_
_
->
pack
$
showGregorian
$
fromGregorian
(
toInteger
y
)
m
d
Month
_
_
_
->
pack
$
showGregorian
$
fromGregorian
(
toInteger
y
)
m
d
Week
_
_
_
->
pack
$
showGregorian
$
fromGregorian
(
toInteger
y
)
m
d
Day
_
_
_
->
pack
$
showGregorian
$
fromGregorian
(
toInteger
y
)
m
d
--------------
--------------
...
@@ -128,7 +142,7 @@ wosToDocs limit patterns time path = do
...
@@ -128,7 +142,7 @@ wosToDocs limit patterns time path = do
(
toPhyloDate'
(
toPhyloDate'
(
fromIntegral
$
fromJust
$
_hd_publication_year
d
)
(
fromIntegral
$
fromJust
$
_hd_publication_year
d
)
(
fromJust
$
_hd_publication_month
d
)
(
fromJust
$
_hd_publication_month
d
)
(
fromJust
$
_hd_publication_day
d
))
(
fromJust
$
_hd_publication_day
d
)
time
)
(
termsInText
patterns
$
title
<>
" "
<>
abstr
)
Nothing
[]
)
(
termsInText
patterns
$
title
<>
" "
<>
abstr
)
Nothing
[]
)
<$>
concat
<$>
concat
<$>
mapConcurrently
(
\
file
->
<$>
mapConcurrently
(
\
file
->
...
@@ -145,7 +159,7 @@ csvToDocs parser patterns time path =
...
@@ -145,7 +159,7 @@ csvToDocs parser patterns time path =
Csv
limit
->
Vector
.
toList
Csv
limit
->
Vector
.
toList
<$>
Vector
.
take
limit
<$>
Vector
.
take
limit
<$>
Vector
.
map
(
\
row
->
Document
(
toPhyloDate
(
csv_publication_year
row
)
(
csv_publication_month
row
)
(
csv_publication_day
row
)
time
)
<$>
Vector
.
map
(
\
row
->
Document
(
toPhyloDate
(
csv_publication_year
row
)
(
csv_publication_month
row
)
(
csv_publication_day
row
)
time
)
(
toPhyloDate'
(
csv_publication_year
row
)
(
csv_publication_month
row
)
(
csv_publication_day
row
))
(
toPhyloDate'
(
csv_publication_year
row
)
(
csv_publication_month
row
)
(
csv_publication_day
row
)
time
)
(
termsInText
patterns
$
(
csv_title
row
)
<>
" "
<>
(
csv_abstract
row
))
(
termsInText
patterns
$
(
csv_title
row
)
<>
" "
<>
(
csv_abstract
row
))
Nothing
Nothing
[]
[]
...
@@ -153,10 +167,10 @@ csvToDocs parser patterns time path =
...
@@ -153,10 +167,10 @@ csvToDocs parser patterns time path =
Csv'
limit
->
Vector
.
toList
Csv'
limit
->
Vector
.
toList
<$>
Vector
.
take
limit
<$>
Vector
.
take
limit
<$>
Vector
.
map
(
\
row
->
Document
(
toPhyloDate
(
csv'_publication_year
row
)
(
csv'_publication_month
row
)
(
csv'_publication_day
row
)
time
)
<$>
Vector
.
map
(
\
row
->
Document
(
toPhyloDate
(
csv'_publication_year
row
)
(
csv'_publication_month
row
)
(
csv'_publication_day
row
)
time
)
(
toPhyloDate'
(
csv'_publication_year
row
)
(
csv'_publication_month
row
)
(
csv'_publication_day
row
))
(
toPhyloDate'
(
csv'_publication_year
row
)
(
csv'_publication_month
row
)
(
csv'_publication_day
row
)
time
)
(
termsInText
patterns
$
(
csv'_title
row
)
<>
" "
<>
(
csv'_abstract
row
))
(
termsInText
patterns
$
(
csv'_title
row
)
<>
" "
<>
(
csv'_abstract
row
))
(
Just
$
csv'_weight
row
)
(
Just
$
csv'_weight
row
)
[
csv'_source
row
]
(
map
(
T
.
strip
.
pack
)
$
splitOn
";"
(
unpack
$
(
csv'_source
row
)))
)
<$>
snd
<$>
Csv
.
readWeightedCsv
path
)
<$>
snd
<$>
Csv
.
readWeightedCsv
path
...
@@ -178,10 +192,11 @@ fileToDocs' parser path time lst = do
...
@@ -178,10 +192,11 @@ fileToDocs' parser path time lst = do
-- Config time parameters to label
-- Config time parameters to label
timeToLabel
::
Config
->
[
Char
]
timeToLabel
::
Config
->
[
Char
]
timeToLabel
config
=
case
(
timeUnit
config
)
of
timeToLabel
config
=
case
(
timeUnit
config
)
of
Year
p
s
f
->
(
"time_years"
<>
"_"
<>
(
show
p
)
<>
"_"
<>
(
show
s
)
<>
"_"
<>
(
show
f
))
Epoch
p
s
f
->
(
"time_epochs"
<>
"_"
<>
(
show
p
)
<>
"_"
<>
(
show
s
)
<>
"_"
<>
(
show
f
))
Month
p
s
f
->
(
"time_months"
<>
"_"
<>
(
show
p
)
<>
"_"
<>
(
show
s
)
<>
"_"
<>
(
show
f
))
Year
p
s
f
->
(
"time_years"
<>
"_"
<>
(
show
p
)
<>
"_"
<>
(
show
s
)
<>
"_"
<>
(
show
f
))
Week
p
s
f
->
(
"time_weeks"
<>
"_"
<>
(
show
p
)
<>
"_"
<>
(
show
s
)
<>
"_"
<>
(
show
f
))
Month
p
s
f
->
(
"time_months"
<>
"_"
<>
(
show
p
)
<>
"_"
<>
(
show
s
)
<>
"_"
<>
(
show
f
))
Day
p
s
f
->
(
"time_days"
<>
"_"
<>
(
show
p
)
<>
"_"
<>
(
show
s
)
<>
"_"
<>
(
show
f
))
Week
p
s
f
->
(
"time_weeks"
<>
"_"
<>
(
show
p
)
<>
"_"
<>
(
show
s
)
<>
"_"
<>
(
show
f
))
Day
p
s
f
->
(
"time_days"
<>
"_"
<>
(
show
p
)
<>
"_"
<>
(
show
s
)
<>
"_"
<>
(
show
f
))
seaToLabel
::
Config
->
[
Char
]
seaToLabel
::
Config
->
[
Char
]
...
@@ -264,6 +279,25 @@ readPhylo path = do
...
@@ -264,6 +279,25 @@ readPhylo path = do
Right
phylo
->
pure
phylo
Right
phylo
->
pure
phylo
readListV4
::
[
Char
]
->
IO
NgramsList
readListV4
path
=
do
listJson
<-
(
eitherDecode
<$>
readJson
path
)
::
IO
(
Either
String
NgramsList
)
case
listJson
of
Left
err
->
do
putStrLn
err
undefined
Right
listV4
->
pure
listV4
fileToList
::
ListParser
->
FilePath
->
IO
TermList
fileToList
parser
path
=
case
parser
of
V3
->
csvMapTermList
path
V4
->
fromJust
<$>
toTermList
MapTerm
NgramsTerms
<$>
readListV4
path
--------------
--------------
-- | Main | --
-- | Main | --
--------------
--------------
...
@@ -283,7 +317,7 @@ main = do
...
@@ -283,7 +317,7 @@ main = do
Right
config
->
do
Right
config
->
do
printIOMsg
"Parse the corpus"
printIOMsg
"Parse the corpus"
mapList
<-
csvMapTermList
(
listPath
config
)
mapList
<-
fileToList
(
listParser
config
)
(
listPath
config
)
corpus
<-
fileToDocs'
(
corpusParser
config
)
(
corpusPath
config
)
(
timeUnit
config
)
mapList
corpus
<-
fileToDocs'
(
corpusParser
config
)
(
corpusPath
config
)
(
timeUnit
config
)
mapList
printIOComment
(
show
(
length
corpus
)
<>
" parsed docs from the corpus"
)
printIOComment
(
show
(
length
corpus
)
<>
" parsed docs from the corpus"
)
...
...
package.yaml
View file @
5be28fb7
...
@@ -44,6 +44,7 @@ library:
...
@@ -44,6 +44,7 @@ library:
-
Gargantext.API.Node.File
-
Gargantext.API.Node.File
-
Gargantext.API.Ngrams
-
Gargantext.API.Ngrams
-
Gargantext.API.Ngrams.Types
-
Gargantext.API.Ngrams.Types
-
Gargantext.API.Ngrams.Prelude
-
Gargantext.API.Admin.Settings
-
Gargantext.API.Admin.Settings
-
Gargantext.API.Admin.EnvTypes
-
Gargantext.API.Admin.EnvTypes
-
Gargantext.API.Admin.Types
-
Gargantext.API.Admin.Types
...
@@ -59,6 +60,7 @@ library:
...
@@ -59,6 +60,7 @@ library:
-
Gargantext.Database.Query.Table.Node
-
Gargantext.Database.Query.Table.Node
-
Gargantext.Database.Query.Table.Node.UpdateOpaleye
-
Gargantext.Database.Query.Table.Node.UpdateOpaleye
-
Gargantext.Database.Query.Table.NgramsPostag
-
Gargantext.Database.Query.Table.NgramsPostag
-
Gargantext.Database.Schema.Ngrams
-
Gargantext.Database.Prelude
-
Gargantext.Database.Prelude
-
Gargantext.Database.Admin.Trigger.Init
-
Gargantext.Database.Admin.Trigger.Init
-
Gargantext.Database.Admin.Config
-
Gargantext.Database.Admin.Config
...
...
src/Gargantext/Core/Viz/AdaptativePhylo.hs
View file @
5be28fb7
...
@@ -53,7 +53,9 @@ data CorpusParser =
...
@@ -53,7 +53,9 @@ data CorpusParser =
Wos
{
_wos_limit
::
Int
}
Wos
{
_wos_limit
::
Int
}
|
Csv
{
_csv_limit
::
Int
}
|
Csv
{
_csv_limit
::
Int
}
|
Csv'
{
_csv'_limit
::
Int
}
|
Csv'
{
_csv'_limit
::
Int
}
deriving
(
Show
,
Generic
,
Eq
)
deriving
(
Show
,
Generic
,
Eq
)
data
ListParser
=
V3
|
V4
deriving
(
Show
,
Generic
,
Eq
)
data
SeaElevation
=
data
SeaElevation
=
Constante
Constante
...
@@ -102,8 +104,12 @@ data Synchrony =
...
@@ -102,8 +104,12 @@ data Synchrony =
deriving
(
Show
,
Generic
,
Eq
)
deriving
(
Show
,
Generic
,
Eq
)
data
TimeUnit
=
data
TimeUnit
=
Year
Epoch
{
_epoch_period
::
Int
,
_epoch_step
::
Int
,
_epoch_matchingFrame
::
Int
}
|
Year
{
_year_period
::
Int
{
_year_period
::
Int
,
_year_step
::
Int
,
_year_step
::
Int
,
_year_matchingFrame
::
Int
}
,
_year_matchingFrame
::
Int
}
...
@@ -145,6 +151,7 @@ data Config =
...
@@ -145,6 +151,7 @@ data Config =
,
listPath
::
FilePath
,
listPath
::
FilePath
,
outputPath
::
FilePath
,
outputPath
::
FilePath
,
corpusParser
::
CorpusParser
,
corpusParser
::
CorpusParser
,
listParser
::
ListParser
,
phyloName
::
Text
,
phyloName
::
Text
,
phyloLevel
::
Int
,
phyloLevel
::
Int
,
phyloProximity
::
Proximity
,
phyloProximity
::
Proximity
...
@@ -166,6 +173,7 @@ defaultConfig =
...
@@ -166,6 +173,7 @@ defaultConfig =
,
listPath
=
""
,
listPath
=
""
,
outputPath
=
""
,
outputPath
=
""
,
corpusParser
=
Csv
1000
,
corpusParser
=
Csv
1000
,
listParser
=
V3
,
phyloName
=
pack
"Default Phylo"
,
phyloName
=
pack
"Default Phylo"
,
phyloLevel
=
2
,
phyloLevel
=
2
,
phyloProximity
=
WeightedLogJaccard
10
,
phyloProximity
=
WeightedLogJaccard
10
...
@@ -184,6 +192,8 @@ instance FromJSON Config
...
@@ -184,6 +192,8 @@ instance FromJSON Config
instance
ToJSON
Config
instance
ToJSON
Config
instance
FromJSON
CorpusParser
instance
FromJSON
CorpusParser
instance
ToJSON
CorpusParser
instance
ToJSON
CorpusParser
instance
FromJSON
ListParser
instance
ToJSON
ListParser
instance
FromJSON
Proximity
instance
FromJSON
Proximity
instance
ToJSON
Proximity
instance
ToJSON
Proximity
instance
FromJSON
SeaElevation
instance
FromJSON
SeaElevation
...
...
src/Gargantext/Core/Viz/Phylo/PhyloTools.hs
View file @
5be28fb7
...
@@ -13,7 +13,7 @@ Portability : POSIX
...
@@ -13,7 +13,7 @@ Portability : POSIX
module
Gargantext.Core.Viz.Phylo.PhyloTools
where
module
Gargantext.Core.Viz.Phylo.PhyloTools
where
import
Data.Vector
(
Vector
,
elemIndex
)
import
Data.Vector
(
Vector
,
elemIndex
)
import
Data.List
(
sort
,
concat
,
null
,
union
,
(
++
),
tails
,
sortOn
,
nub
,
init
,
tail
,
partition
,
tails
,
nubBy
,
group
)
import
Data.List
(
sort
,
concat
,
null
,
union
,
(
++
),
tails
,
sortOn
,
nub
,
init
,
tail
,
partition
,
tails
,
nubBy
,
group
,
notElem
)
import
Data.Set
(
Set
,
disjoint
)
import
Data.Set
(
Set
,
disjoint
)
import
Data.Map
(
Map
,
elems
,
fromList
,
unionWith
,
keys
,
member
,
(
!
),
filterWithKey
,
fromListWith
,
empty
,
restrictKeys
)
import
Data.Map
(
Map
,
elems
,
fromList
,
unionWith
,
keys
,
member
,
(
!
),
filterWithKey
,
fromListWith
,
empty
,
restrictKeys
)
import
Data.String
(
String
)
import
Data.String
(
String
)
...
@@ -162,7 +162,7 @@ toFstDate ds = snd
...
@@ -162,7 +162,7 @@ toFstDate ds = snd
$
head'
"firstDate"
$
head'
"firstDate"
$
sortOn
fst
$
sortOn
fst
$
map
(
\
d
->
$
map
(
\
d
->
let
d'
=
read
(
filter
(
\
c
->
c
/=
'-'
)
$
unpack
d
)
::
Int
let
d'
=
read
(
filter
(
\
c
->
notElem
c
[
'U'
,
'T'
,
'C'
,
' '
,
':'
,
'-'
]
)
$
unpack
d
)
::
Int
in
(
d'
,
d
))
ds
in
(
d'
,
d
))
ds
toLstDate
::
[
Text
]
->
Text
toLstDate
::
[
Text
]
->
Text
...
@@ -171,12 +171,13 @@ toLstDate ds = snd
...
@@ -171,12 +171,13 @@ toLstDate ds = snd
$
reverse
$
reverse
$
sortOn
fst
$
sortOn
fst
$
map
(
\
d
->
$
map
(
\
d
->
let
d'
=
read
(
filter
(
\
c
->
c
/=
'-'
)
$
unpack
d
)
::
Int
let
d'
=
read
(
filter
(
\
c
->
notElem
c
[
'U'
,
'T'
,
'C'
,
' '
,
':'
,
'-'
]
)
$
unpack
d
)
::
Int
in
(
d'
,
d
))
ds
in
(
d'
,
d
))
ds
getTimeScale
::
Phylo
->
[
Char
]
getTimeScale
::
Phylo
->
[
Char
]
getTimeScale
p
=
case
(
timeUnit
$
getConfig
p
)
of
getTimeScale
p
=
case
(
timeUnit
$
getConfig
p
)
of
Epoch
_
_
_
->
"epoch"
Year
_
_
_
->
"year"
Year
_
_
_
->
"year"
Month
_
_
_
->
"month"
Month
_
_
_
->
"month"
Week
_
_
_
->
"week"
Week
_
_
_
->
"week"
...
@@ -192,6 +193,7 @@ toTimeScale dates step =
...
@@ -192,6 +193,7 @@ toTimeScale dates step =
getTimeStep
::
TimeUnit
->
Int
getTimeStep
::
TimeUnit
->
Int
getTimeStep
time
=
case
time
of
getTimeStep
time
=
case
time
of
Epoch
_
s
_
->
s
Year
_
s
_
->
s
Year
_
s
_
->
s
Month
_
s
_
->
s
Month
_
s
_
->
s
Week
_
s
_
->
s
Week
_
s
_
->
s
...
@@ -199,6 +201,7 @@ getTimeStep time = case time of
...
@@ -199,6 +201,7 @@ getTimeStep time = case time of
getTimePeriod
::
TimeUnit
->
Int
getTimePeriod
::
TimeUnit
->
Int
getTimePeriod
time
=
case
time
of
getTimePeriod
time
=
case
time
of
Epoch
p
_
_
->
p
Year
p
_
_
->
p
Year
p
_
_
->
p
Month
p
_
_
->
p
Month
p
_
_
->
p
Week
p
_
_
->
p
Week
p
_
_
->
p
...
@@ -206,6 +209,7 @@ getTimePeriod time = case time of
...
@@ -206,6 +209,7 @@ getTimePeriod time = case time of
getTimeFrame
::
TimeUnit
->
Int
getTimeFrame
::
TimeUnit
->
Int
getTimeFrame
time
=
case
time
of
getTimeFrame
time
=
case
time
of
Epoch
_
_
f
->
f
Year
_
_
f
->
f
Year
_
_
f
->
f
Month
_
_
f
->
f
Month
_
_
f
->
f
Week
_
_
f
->
f
Week
_
_
f
->
f
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment