Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
145
Issues
145
List
Board
Labels
Milestones
Merge Requests
6
Merge Requests
6
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
gargantext
haskell-gargantext
Commits
349ed2a2
Commit
349ed2a2
authored
Aug 12, 2019
by
qlobbe
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
add Wos parser
parent
40da8153
Pipeline
#541
failed with stage
Changes
5
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
170 additions
and
31 deletions
+170
-31
Main.hs
bin/gargantext-adaptative-phylo/Main.hs
+44
-5
AdaptativePhylo.hs
src/Gargantext/Viz/AdaptativePhylo.hs
+76
-6
PhyloExample.hs
src/Gargantext/Viz/Phylo/PhyloExample.hs
+15
-1
PhyloMaker.hs
src/Gargantext/Viz/Phylo/PhyloMaker.hs
+21
-1
PhyloTools.hs
src/Gargantext/Viz/Phylo/PhyloTools.hs
+14
-18
No files found.
bin/gargantext-adaptative-phylo/Main.hs
View file @
349ed2a2
...
...
@@ -23,14 +23,16 @@ module Main where
import
Data.Aeson
import
Data.ByteString.Lazy
(
ByteString
)
import
Data.Maybe
()
import
Data.List
(
concat
,
nub
)
import
Data.Maybe
(
isJust
,
fromJust
)
import
Data.List
(
concat
,
nub
,
isSuffixOf
,
take
)
import
Data.String
(
String
)
import
Data.Text
(
Text
,
unwords
)
import
Gargantext.Prelude
import
Gargantext.Database.Types.Node
(
HyperdataDocument
(
..
))
import
Gargantext.Text.Context
(
TermList
)
import
Gargantext.Text.Corpus.Parsers.CSV
(
csv_title
,
csv_abstract
,
csv_publication_year
)
import
Gargantext.Text.Corpus.Parsers
(
FileFormat
(
..
),
parseFile
)
import
Gargantext.Text.List.CSV
(
csvGraphTermList
)
import
Gargantext.Text.Terms.WithList
(
Patterns
,
buildPatterns
,
extractTermsWithList
)
import
Gargantext.Viz.AdaptativePhylo
...
...
@@ -38,6 +40,8 @@ import Gargantext.Viz.AdaptativePhylo
import
GHC.IO
(
FilePath
)
import
Prelude
(
Either
(
..
))
import
System.Environment
import
System.Directory
(
listDirectory
)
import
Control.Concurrent.Async
(
mapConcurrently
)
import
qualified
Data.ByteString.Lazy
as
Lazy
import
qualified
Data.Vector
as
Vector
...
...
@@ -64,10 +68,28 @@ printIOComment cmt =
putStrLn
(
"
\n
"
<>
cmt
<>
"
\n
"
)
-- | To get all the files in a directory or just a file
getFilesFromPath
::
FilePath
->
IO
([
FilePath
])
getFilesFromPath
path
=
do
if
(
isSuffixOf
"/"
path
)
then
(
listDirectory
path
)
else
return
[
path
]
--------------
-- | Json | --
--------------
-- | To read and decode a Json file
readJson
::
FilePath
->
IO
ByteString
readJson
path
=
Lazy
.
readFile
path
----------------
-- | Parser | --
----------------
-- | To filter the Ngrams of a document based on the termList
filterTerms
::
Patterns
->
(
a
,
Text
)
->
(
a
,
[
Text
])
filterTerms
patterns
(
y
,
d
)
=
(
y
,
termsInText
patterns
d
)
...
...
@@ -78,7 +100,25 @@ filterTerms patterns (y,d) = (y,termsInText patterns d)
--------------------------------------
-- | To transform a Csv nfile into a readable corpus
-- | To transform a Wos file (or [file]) into a readable corpus
wosToCorpus
::
Int
->
FilePath
->
IO
([(
Int
,
Text
)])
wosToCorpus
limit
path
=
do
files
<-
getFilesFromPath
path
take
limit
<$>
map
(
\
d
->
let
date'
=
fromJust
$
_hyperdataDocument_publication_year
d
title
=
fromJust
$
_hyperdataDocument_title
d
abstr
=
if
(
isJust
$
_hyperdataDocument_abstract
d
)
then
fromJust
$
_hyperdataDocument_abstract
d
else
""
in
(
date'
,
title
<>
" "
<>
abstr
))
<$>
concat
<$>
mapConcurrently
(
\
file
->
filter
(
\
d
->
(
isJust
$
_hyperdataDocument_publication_year
d
)
&&
(
isJust
$
_hyperdataDocument_title
d
))
<$>
parseFile
WOS
(
path
<>
file
)
)
files
-- | To transform a Csv file into a readable corpus
csvToCorpus
::
Int
->
FilePath
->
IO
([(
Int
,
Text
)])
csvToCorpus
limit
path
=
Vector
.
toList
<$>
Vector
.
take
limit
...
...
@@ -89,8 +129,7 @@ csvToCorpus limit path = Vector.toList
-- | To use the correct parser given a CorpusType
fileToCorpus
::
CorpusParser
->
Int
->
FilePath
->
IO
([(
Int
,
Text
)])
fileToCorpus
parser
limit
path
=
case
parser
of
-- To do Wos from legacy Main.hs
Wos
->
undefined
Wos
->
wosToCorpus
limit
path
Csv
->
csvToCorpus
limit
path
...
...
src/Gargantext/Viz/AdaptativePhylo.hs
View file @
349ed2a2
...
...
@@ -30,8 +30,10 @@ module Gargantext.Viz.AdaptativePhylo where
import
Data.Aeson
import
Data.Aeson.TH
(
deriveJSON
)
import
Data.Text
(
Text
)
import
Data.Text
(
Text
,
pack
)
import
Data.Vector
(
Vector
)
import
Data.Map
(
Map
)
import
Data.Matrix
(
Matrix
)
import
Gargantext.Core.Utils.Prefix
(
unPrefix
)
import
Gargantext.Prelude
...
...
@@ -48,7 +50,7 @@ import Control.Lens (makeLenses)
----------------
data
CorpusParser
=
Wos
|
Csv
deriving
(
Show
,
Generic
)
data
CorpusParser
=
Wos
|
Csv
deriving
(
Show
,
Generic
,
Eq
)
data
Config
=
Config
{
corpusPath
::
FilePath
...
...
@@ -62,9 +64,23 @@ data Config =
,
timeStep
::
Int
,
fisSupport
::
Int
,
fisSize
::
Int
,
branchSize
::
Int
,
safeParall
::
Bool
}
deriving
(
Show
,
Generic
)
,
branchSize
::
Int
}
deriving
(
Show
,
Generic
,
Eq
)
defaultConfig
=
Config
{
corpusPath
=
""
,
listPath
=
""
,
outputPath
=
""
,
corpusParser
=
Csv
,
corpusLimit
=
1000
,
phyloName
=
pack
"Default Phylo"
,
phyloLevel
=
2
,
timePeriod
=
3
,
timeStep
=
1
,
fisSupport
=
2
,
fisSize
=
4
,
branchSize
=
3
}
instance
FromJSON
Config
instance
ToJSON
Config
...
...
@@ -72,6 +88,30 @@ instance FromJSON CorpusParser
instance
ToJSON
CorpusParser
-- | Software parameters
data
Software
=
Software
{
_software_name
::
Text
,
_software_version
::
Text
}
deriving
(
Generic
,
Show
,
Eq
)
defaultSoftware
=
Software
{
_software_name
=
pack
"Gargantext"
,
_software_version
=
pack
"v4"
}
-- | Global parameters of a Phylo
data
PhyloParam
=
PhyloParam
{
_phyloParam_version
::
Text
,
_phyloParam_software
::
Software
,
_phyloParam_config
::
Config
}
deriving
(
Generic
,
Show
,
Eq
)
defaultPhyloParam
=
PhyloParam
{
_phyloParam_version
=
pack
"v2.adaptative"
,
_phyloParam_software
=
defaultSoftware
,
_phyloParam_config
=
defaultConfig
}
------------------
-- | Document | --
------------------
...
...
@@ -87,7 +127,7 @@ type Ngrams = Text
data
Document
=
Document
{
date
::
Date
,
text
::
[
Ngrams
]
}
deriving
(
Show
,
Generic
,
NFData
)
}
deriving
(
Eq
,
Show
,
Generic
,
NFData
)
--------------------
...
...
@@ -102,10 +142,40 @@ data PhyloFoundations = PhyloFoundations
}
deriving
(
Generic
,
Show
,
Eq
)
---------------------------
-- | Coocurency Matrix | --
---------------------------
-- | Cooc : a weighted (Double) coocurency matrix
type
Cooc
=
Matrix
Double
-------------------
-- | Phylomemy | --
-------------------
-- | Phylo datatype of a phylomemy
-- foundations : the foundations of the phylo
-- timeCooc : a Map of coocurency by minimal unit of time (ex: by year)
-- timeDocs : a Map with the numbers of docs by minimal unit of time (ex: by year)
-- param : the parameters of the phylomemy (with the user's configuration)
data
Phylo
=
Phylo
{
_phylo_foundations
::
PhyloFoundations
,
_phylo_timeCooc
::
!
(
Map
Date
Cooc
)
,
_phylo_timeDocs
::
!
(
Map
Date
Double
)
,
_phylo_param
::
PhyloParam
}
deriving
(
Generic
,
Show
,
Eq
)
----------------
-- | Lenses | --
----------------
makeLenses
''
C
onfig
makeLenses
''
P
hyloFoundations
------------------------
...
...
src/Gargantext/Viz/Phylo/PhyloExample.hs
View file @
349ed2a2
...
...
@@ -20,6 +20,7 @@ Portability : POSIX
module
Gargantext.Viz.Phylo.PhyloExample
where
import
Data.List
(
sortOn
)
import
Data.Map
(
Map
)
import
Data.Text
(
Text
,
toLower
)
import
Gargantext.Prelude
...
...
@@ -27,6 +28,7 @@ import Gargantext.Text.Context (TermList)
import
Gargantext.Text.Terms.Mono
(
monoTexts
)
import
Gargantext.Viz.AdaptativePhylo
import
Gargantext.Viz.Phylo.PhyloTools
import
Gargantext.Viz.Phylo.PhyloMaker
import
Control.Lens
...
...
@@ -38,7 +40,19 @@ import qualified Data.Vector as Vector
--------------------------------------------
-- Next is to build the config and the phyloLevel 0
-- cooc et phyloBase
nbDocsByYear
::
Map
Date
Double
nbDocsByYear
=
nbDocsByTime
docs
1
config
::
Config
config
=
defaultConfig
{
phyloName
=
"Cesar et Cleopatre"
,
branchSize
=
0
,
fisSupport
=
0
,
fisSize
=
0
}
docs
::
[
Document
]
...
...
src/Gargantext/Viz/Phylo/PhyloMaker.hs
View file @
349ed2a2
...
...
@@ -13,4 +13,24 @@ Portability : POSIX
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE MultiParamTypeClasses #-}
module
Gargantext.Viz.Phylo.PhyloMaker
where
\ No newline at end of file
module
Gargantext.Viz.Phylo.PhyloMaker
where
import
Data.Map
(
Map
,
fromListWith
,
keys
,
unionWith
,
fromList
)
import
Gargantext.Prelude
import
Gargantext.Viz.AdaptativePhylo
import
Gargantext.Viz.Phylo.PhyloTools
--------------------
-- | to Phylo 0 | --
--------------------
nbDocsByTime
::
[
Document
]
->
Int
->
Map
Date
Double
nbDocsByTime
docs
step
=
let
docs'
=
fromListWith
(
+
)
$
map
(
\
d
->
(
date
d
,
1
))
docs
time
=
fromList
$
map
(
\
t
->
(
t
,
0
))
$
toTimeScale
(
keys
docs'
)
step
in
unionWith
(
+
)
time
docs'
src/Gargantext/Viz/Phylo/PhyloTools.hs
View file @
349ed2a2
...
...
@@ -19,6 +19,7 @@ module Gargantext.Viz.Phylo.PhyloTools where
import
Data.Maybe
(
Maybe
,
fromMaybe
)
import
Data.Text
(
Text
)
import
Data.Vector
(
Vector
)
import
Data.List
(
sort
)
import
Gargantext.Prelude
import
Gargantext.Viz.AdaptativePhylo
...
...
@@ -28,23 +29,6 @@ import GHC.IO (FilePath)
import
qualified
Data.Vector
as
Vector
----------------
-- | Config | --
----------------
-- | Define a default value
def
::
a
->
Maybe
a
->
a
def
=
fromMaybe
-- | To init a configuration
initConfig
::
Maybe
FilePath
->
Maybe
FilePath
->
Maybe
FilePath
->
Maybe
CorpusParser
->
Maybe
Int
->
Maybe
Text
->
Maybe
Int
->
Maybe
Int
->
Maybe
Int
->
Maybe
Int
->
Maybe
Int
->
Maybe
Int
->
Maybe
Bool
->
Config
initConfig
(
def
""
->
corpus
)
(
def
""
->
mapList
)
(
def
""
->
output
)
(
def
Csv
->
parser
)
(
def
10000
->
limit
)
(
def
"A phylomemy"
->
name
)
(
def
2
->
level
)
(
def
3
->
period
)
(
def
1
->
step
)
(
def
3
->
support
)
(
def
4
->
clique
)
(
def
3
->
minBranchSize
)
(
def
True
->
safe
)
=
Config
corpus
mapList
output
parser
limit
name
level
period
step
support
clique
minBranchSize
safe
---------------------
-- | Foundations | --
---------------------
...
...
@@ -52,4 +36,16 @@ initConfig (def "" -> corpus) (def "" -> mapList) (def "" -> output) (def Csv ->
-- | Is this Ngrams a Foundations Root ?
isRoots
::
Ngrams
->
Vector
Ngrams
->
Bool
isRoots
n
ns
=
Vector
.
elem
n
ns
\ No newline at end of file
isRoots
n
ns
=
Vector
.
elem
n
ns
--------------
-- | Time | --
--------------
-- | Get a regular & ascendante timeScale from a given list of dates
toTimeScale
::
[
Date
]
->
Int
->
[
Date
]
toTimeScale
dates
step
=
let
dates'
=
sort
dates
in
[
head'
"toTimeScale"
dates'
,
((
head'
"toTimeScale"
dates'
)
+
step
)
..
last'
"toTimeScale"
dates'
]
\ No newline at end of file
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment