Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
H
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Przemyslaw Kaminski
haskell-gargantext
Commits
5be28fb7
Commit
5be28fb7
authored
May 21, 2021
by
qlobbe
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
add a list parser param
parent
6bddaf45
Changes
4
Show whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
68 additions
and
18 deletions
+68
-18
Main.hs
bin/gargantext-adaptative-phylo/Main.hs
+46
-12
package.yaml
package.yaml
+2
-0
AdaptativePhylo.hs
src/Gargantext/Core/Viz/AdaptativePhylo.hs
+13
-3
PhyloTools.hs
src/Gargantext/Core/Viz/Phylo/PhyloTools.hs
+7
-3
No files found.
bin/gargantext-adaptative-phylo/Main.hs
View file @
5be28fb7
...
...
@@ -34,7 +34,11 @@ import Gargantext.Core.Viz.AdaptativePhylo
import
Gargantext.Core.Viz.Phylo.PhyloMaker
(
toPhylo
,
toPhyloStep
)
import
Gargantext.Core.Viz.Phylo.PhyloTools
(
printIOMsg
,
printIOComment
,
setConfig
)
import
Gargantext.Core.Viz.Phylo.PhyloExport
(
toPhyloExport
,
dotToFile
)
-- import Gargantext.API.Ngrams.Prelude (toTermList)
import
Gargantext.API.Ngrams.Types
import
Gargantext.Core.Types.Main
(
ListType
(
..
))
import
Gargantext.Database.Schema.Ngrams
(
NgramsType
(
..
))
import
Gargantext.API.Ngrams.Prelude
(
toTermList
)
-- import qualified Gargantext.Database.Query.Table.Ngrams as TableNgrams
import
GHC.IO
(
FilePath
)
import
Prelude
(
Either
(
Left
,
Right
),
toInteger
)
...
...
@@ -43,6 +47,7 @@ import System.Directory (listDirectory,doesFileExist)
import
Control.Concurrent.Async
(
mapConcurrently
)
import
Data.Time.Calendar
(
fromGregorian
,
diffGregorianDurationClip
,
cdMonths
,
diffDays
,
showGregorian
)
import
Data.Time.Clock.POSIX
(
posixSecondsToUTCTime
)
import
qualified
Data.ByteString.Char8
as
C8
import
qualified
Data.ByteString.Lazy
as
Lazy
...
...
@@ -50,6 +55,8 @@ import qualified Data.Vector as Vector
import
qualified
Gargantext.Core.Text.Corpus.Parsers.CSV
as
Csv
import
qualified
Data.Text
as
T
import
Data.List.Split
-- import Debug.Trace (trace)
data
PhyloStage
=
PhyloWithCliques
|
PhyloWithLinks
deriving
(
Show
)
...
...
@@ -83,14 +90,21 @@ toDays y m d = fromIntegral
toPhyloDate
::
Int
->
Int
->
Int
->
TimeUnit
->
Date
toPhyloDate
y
m
d
tu
=
case
tu
of
Epoch
_
_
_
->
y
Year
_
_
_
->
y
Month
_
_
_
->
toMonths
(
toInteger
y
)
m
d
Week
_
_
_
->
div
(
toDays
(
toInteger
y
)
m
d
)
7
Day
_
_
_
->
toDays
(
toInteger
y
)
m
d
toPhyloDate'
::
Int
->
Int
->
Int
->
Text
toPhyloDate'
y
m
d
=
pack
$
showGregorian
$
fromGregorian
(
toInteger
y
)
m
d
toPhyloDate'
::
Int
->
Int
->
Int
->
TimeUnit
->
Text
toPhyloDate'
y
m
d
tu
=
case
tu
of
Epoch
_
_
_
->
pack
$
show
$
posixSecondsToUTCTime
$
fromIntegral
y
Year
_
_
_
->
pack
$
showGregorian
$
fromGregorian
(
toInteger
y
)
m
d
Month
_
_
_
->
pack
$
showGregorian
$
fromGregorian
(
toInteger
y
)
m
d
Week
_
_
_
->
pack
$
showGregorian
$
fromGregorian
(
toInteger
y
)
m
d
Day
_
_
_
->
pack
$
showGregorian
$
fromGregorian
(
toInteger
y
)
m
d
--------------
...
...
@@ -128,7 +142,7 @@ wosToDocs limit patterns time path = do
(
toPhyloDate'
(
fromIntegral
$
fromJust
$
_hd_publication_year
d
)
(
fromJust
$
_hd_publication_month
d
)
(
fromJust
$
_hd_publication_day
d
))
(
fromJust
$
_hd_publication_day
d
)
time
)
(
termsInText
patterns
$
title
<>
" "
<>
abstr
)
Nothing
[]
)
<$>
concat
<$>
mapConcurrently
(
\
file
->
...
...
@@ -145,7 +159,7 @@ csvToDocs parser patterns time path =
Csv
limit
->
Vector
.
toList
<$>
Vector
.
take
limit
<$>
Vector
.
map
(
\
row
->
Document
(
toPhyloDate
(
csv_publication_year
row
)
(
csv_publication_month
row
)
(
csv_publication_day
row
)
time
)
(
toPhyloDate'
(
csv_publication_year
row
)
(
csv_publication_month
row
)
(
csv_publication_day
row
))
(
toPhyloDate'
(
csv_publication_year
row
)
(
csv_publication_month
row
)
(
csv_publication_day
row
)
time
)
(
termsInText
patterns
$
(
csv_title
row
)
<>
" "
<>
(
csv_abstract
row
))
Nothing
[]
...
...
@@ -153,10 +167,10 @@ csvToDocs parser patterns time path =
Csv'
limit
->
Vector
.
toList
<$>
Vector
.
take
limit
<$>
Vector
.
map
(
\
row
->
Document
(
toPhyloDate
(
csv'_publication_year
row
)
(
csv'_publication_month
row
)
(
csv'_publication_day
row
)
time
)
(
toPhyloDate'
(
csv'_publication_year
row
)
(
csv'_publication_month
row
)
(
csv'_publication_day
row
))
(
toPhyloDate'
(
csv'_publication_year
row
)
(
csv'_publication_month
row
)
(
csv'_publication_day
row
)
time
)
(
termsInText
patterns
$
(
csv'_title
row
)
<>
" "
<>
(
csv'_abstract
row
))
(
Just
$
csv'_weight
row
)
[
csv'_source
row
]
(
map
(
T
.
strip
.
pack
)
$
splitOn
";"
(
unpack
$
(
csv'_source
row
)))
)
<$>
snd
<$>
Csv
.
readWeightedCsv
path
...
...
@@ -178,8 +192,9 @@ fileToDocs' parser path time lst = do
-- Config time parameters to label
timeToLabel
::
Config
->
[
Char
]
timeToLabel
config
=
case
(
timeUnit
config
)
of
Epoch
p
s
f
->
(
"time_epochs"
<>
"_"
<>
(
show
p
)
<>
"_"
<>
(
show
s
)
<>
"_"
<>
(
show
f
))
Year
p
s
f
->
(
"time_years"
<>
"_"
<>
(
show
p
)
<>
"_"
<>
(
show
s
)
<>
"_"
<>
(
show
f
))
Month
p
s
f
->
(
"time_months"
<>
"_"
<>
(
show
p
)
<>
"_"
<>
(
show
s
)
<>
"_"
<>
(
show
f
))
Month
p
s
f
->
(
"time_months"
<>
"_"
<>
(
show
p
)
<>
"_"
<>
(
show
s
)
<>
"_"
<>
(
show
f
))
Week
p
s
f
->
(
"time_weeks"
<>
"_"
<>
(
show
p
)
<>
"_"
<>
(
show
s
)
<>
"_"
<>
(
show
f
))
Day
p
s
f
->
(
"time_days"
<>
"_"
<>
(
show
p
)
<>
"_"
<>
(
show
s
)
<>
"_"
<>
(
show
f
))
...
...
@@ -264,6 +279,25 @@ readPhylo path = do
Right
phylo
->
pure
phylo
readListV4
::
[
Char
]
->
IO
NgramsList
readListV4
path
=
do
listJson
<-
(
eitherDecode
<$>
readJson
path
)
::
IO
(
Either
String
NgramsList
)
case
listJson
of
Left
err
->
do
putStrLn
err
undefined
Right
listV4
->
pure
listV4
fileToList
::
ListParser
->
FilePath
->
IO
TermList
fileToList
parser
path
=
case
parser
of
V3
->
csvMapTermList
path
V4
->
fromJust
<$>
toTermList
MapTerm
NgramsTerms
<$>
readListV4
path
--------------
-- | Main | --
--------------
...
...
@@ -283,7 +317,7 @@ main = do
Right
config
->
do
printIOMsg
"Parse the corpus"
mapList
<-
csvMapTermList
(
listPath
config
)
mapList
<-
fileToList
(
listParser
config
)
(
listPath
config
)
corpus
<-
fileToDocs'
(
corpusParser
config
)
(
corpusPath
config
)
(
timeUnit
config
)
mapList
printIOComment
(
show
(
length
corpus
)
<>
" parsed docs from the corpus"
)
...
...
package.yaml
View file @
5be28fb7
...
...
@@ -44,6 +44,7 @@ library:
-
Gargantext.API.Node.File
-
Gargantext.API.Ngrams
-
Gargantext.API.Ngrams.Types
-
Gargantext.API.Ngrams.Prelude
-
Gargantext.API.Admin.Settings
-
Gargantext.API.Admin.EnvTypes
-
Gargantext.API.Admin.Types
...
...
@@ -59,6 +60,7 @@ library:
-
Gargantext.Database.Query.Table.Node
-
Gargantext.Database.Query.Table.Node.UpdateOpaleye
-
Gargantext.Database.Query.Table.NgramsPostag
-
Gargantext.Database.Schema.Ngrams
-
Gargantext.Database.Prelude
-
Gargantext.Database.Admin.Trigger.Init
-
Gargantext.Database.Admin.Config
...
...
src/Gargantext/Core/Viz/AdaptativePhylo.hs
View file @
5be28fb7
...
...
@@ -55,6 +55,8 @@ data CorpusParser =
|
Csv'
{
_csv'_limit
::
Int
}
deriving
(
Show
,
Generic
,
Eq
)
data
ListParser
=
V3
|
V4
deriving
(
Show
,
Generic
,
Eq
)
data
SeaElevation
=
Constante
{
_cons_start
::
Double
...
...
@@ -103,7 +105,11 @@ data Synchrony =
data
TimeUnit
=
Year
Epoch
{
_epoch_period
::
Int
,
_epoch_step
::
Int
,
_epoch_matchingFrame
::
Int
}
|
Year
{
_year_period
::
Int
,
_year_step
::
Int
,
_year_matchingFrame
::
Int
}
...
...
@@ -145,6 +151,7 @@ data Config =
,
listPath
::
FilePath
,
outputPath
::
FilePath
,
corpusParser
::
CorpusParser
,
listParser
::
ListParser
,
phyloName
::
Text
,
phyloLevel
::
Int
,
phyloProximity
::
Proximity
...
...
@@ -166,6 +173,7 @@ defaultConfig =
,
listPath
=
""
,
outputPath
=
""
,
corpusParser
=
Csv
1000
,
listParser
=
V3
,
phyloName
=
pack
"Default Phylo"
,
phyloLevel
=
2
,
phyloProximity
=
WeightedLogJaccard
10
...
...
@@ -184,6 +192,8 @@ instance FromJSON Config
instance
ToJSON
Config
instance
FromJSON
CorpusParser
instance
ToJSON
CorpusParser
instance
FromJSON
ListParser
instance
ToJSON
ListParser
instance
FromJSON
Proximity
instance
ToJSON
Proximity
instance
FromJSON
SeaElevation
...
...
src/Gargantext/Core/Viz/Phylo/PhyloTools.hs
View file @
5be28fb7
...
...
@@ -13,7 +13,7 @@ Portability : POSIX
module
Gargantext.Core.Viz.Phylo.PhyloTools
where
import
Data.Vector
(
Vector
,
elemIndex
)
import
Data.List
(
sort
,
concat
,
null
,
union
,
(
++
),
tails
,
sortOn
,
nub
,
init
,
tail
,
partition
,
tails
,
nubBy
,
group
)
import
Data.List
(
sort
,
concat
,
null
,
union
,
(
++
),
tails
,
sortOn
,
nub
,
init
,
tail
,
partition
,
tails
,
nubBy
,
group
,
notElem
)
import
Data.Set
(
Set
,
disjoint
)
import
Data.Map
(
Map
,
elems
,
fromList
,
unionWith
,
keys
,
member
,
(
!
),
filterWithKey
,
fromListWith
,
empty
,
restrictKeys
)
import
Data.String
(
String
)
...
...
@@ -162,7 +162,7 @@ toFstDate ds = snd
$
head'
"firstDate"
$
sortOn
fst
$
map
(
\
d
->
let
d'
=
read
(
filter
(
\
c
->
c
/=
'-'
)
$
unpack
d
)
::
Int
let
d'
=
read
(
filter
(
\
c
->
notElem
c
[
'U'
,
'T'
,
'C'
,
' '
,
':'
,
'-'
]
)
$
unpack
d
)
::
Int
in
(
d'
,
d
))
ds
toLstDate
::
[
Text
]
->
Text
...
...
@@ -171,12 +171,13 @@ toLstDate ds = snd
$
reverse
$
sortOn
fst
$
map
(
\
d
->
let
d'
=
read
(
filter
(
\
c
->
c
/=
'-'
)
$
unpack
d
)
::
Int
let
d'
=
read
(
filter
(
\
c
->
notElem
c
[
'U'
,
'T'
,
'C'
,
' '
,
':'
,
'-'
]
)
$
unpack
d
)
::
Int
in
(
d'
,
d
))
ds
getTimeScale
::
Phylo
->
[
Char
]
getTimeScale
p
=
case
(
timeUnit
$
getConfig
p
)
of
Epoch
_
_
_
->
"epoch"
Year
_
_
_
->
"year"
Month
_
_
_
->
"month"
Week
_
_
_
->
"week"
...
...
@@ -192,6 +193,7 @@ toTimeScale dates step =
getTimeStep
::
TimeUnit
->
Int
getTimeStep
time
=
case
time
of
Epoch
_
s
_
->
s
Year
_
s
_
->
s
Month
_
s
_
->
s
Week
_
s
_
->
s
...
...
@@ -199,6 +201,7 @@ getTimeStep time = case time of
getTimePeriod
::
TimeUnit
->
Int
getTimePeriod
time
=
case
time
of
Epoch
p
_
_
->
p
Year
p
_
_
->
p
Month
p
_
_
->
p
Week
p
_
_
->
p
...
...
@@ -206,6 +209,7 @@ getTimePeriod time = case time of
getTimeFrame
::
TimeUnit
->
Int
getTimeFrame
time
=
case
time
of
Epoch
_
_
f
->
f
Year
_
_
f
->
f
Month
_
_
f
->
f
Week
_
_
f
->
f
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment