Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
158
Issues
158
List
Board
Labels
Milestones
Merge Requests
11
Merge Requests
11
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
gargantext
haskell-gargantext
Commits
349ed2a2
Commit
349ed2a2
authored
Aug 12, 2019
by
qlobbe
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
add Wos parser
parent
40da8153
Pipeline
#541
failed with stage
Changes
5
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
170 additions
and
31 deletions
+170
-31
Main.hs
bin/gargantext-adaptative-phylo/Main.hs
+44
-5
AdaptativePhylo.hs
src/Gargantext/Viz/AdaptativePhylo.hs
+76
-6
PhyloExample.hs
src/Gargantext/Viz/Phylo/PhyloExample.hs
+15
-1
PhyloMaker.hs
src/Gargantext/Viz/Phylo/PhyloMaker.hs
+21
-1
PhyloTools.hs
src/Gargantext/Viz/Phylo/PhyloTools.hs
+14
-18
No files found.
bin/gargantext-adaptative-phylo/Main.hs
View file @
349ed2a2
...
@@ -23,14 +23,16 @@ module Main where
...
@@ -23,14 +23,16 @@ module Main where
import
Data.Aeson
import
Data.Aeson
import
Data.ByteString.Lazy
(
ByteString
)
import
Data.ByteString.Lazy
(
ByteString
)
import
Data.Maybe
()
import
Data.Maybe
(
isJust
,
fromJust
)
import
Data.List
(
concat
,
nub
)
import
Data.List
(
concat
,
nub
,
isSuffixOf
,
take
)
import
Data.String
(
String
)
import
Data.String
(
String
)
import
Data.Text
(
Text
,
unwords
)
import
Data.Text
(
Text
,
unwords
)
import
Gargantext.Prelude
import
Gargantext.Prelude
import
Gargantext.Database.Types.Node
(
HyperdataDocument
(
..
))
import
Gargantext.Text.Context
(
TermList
)
import
Gargantext.Text.Context
(
TermList
)
import
Gargantext.Text.Corpus.Parsers.CSV
(
csv_title
,
csv_abstract
,
csv_publication_year
)
import
Gargantext.Text.Corpus.Parsers.CSV
(
csv_title
,
csv_abstract
,
csv_publication_year
)
import
Gargantext.Text.Corpus.Parsers
(
FileFormat
(
..
),
parseFile
)
import
Gargantext.Text.List.CSV
(
csvGraphTermList
)
import
Gargantext.Text.List.CSV
(
csvGraphTermList
)
import
Gargantext.Text.Terms.WithList
(
Patterns
,
buildPatterns
,
extractTermsWithList
)
import
Gargantext.Text.Terms.WithList
(
Patterns
,
buildPatterns
,
extractTermsWithList
)
import
Gargantext.Viz.AdaptativePhylo
import
Gargantext.Viz.AdaptativePhylo
...
@@ -38,6 +40,8 @@ import Gargantext.Viz.AdaptativePhylo
...
@@ -38,6 +40,8 @@ import Gargantext.Viz.AdaptativePhylo
import
GHC.IO
(
FilePath
)
import
GHC.IO
(
FilePath
)
import
Prelude
(
Either
(
..
))
import
Prelude
(
Either
(
..
))
import
System.Environment
import
System.Environment
import
System.Directory
(
listDirectory
)
import
Control.Concurrent.Async
(
mapConcurrently
)
import
qualified
Data.ByteString.Lazy
as
Lazy
import
qualified
Data.ByteString.Lazy
as
Lazy
import
qualified
Data.Vector
as
Vector
import
qualified
Data.Vector
as
Vector
...
@@ -64,10 +68,28 @@ printIOComment cmt =
...
@@ -64,10 +68,28 @@ printIOComment cmt =
putStrLn
(
"
\n
"
<>
cmt
<>
"
\n
"
)
putStrLn
(
"
\n
"
<>
cmt
<>
"
\n
"
)
-- | To get all the files in a directory or just a file
getFilesFromPath
::
FilePath
->
IO
([
FilePath
])
getFilesFromPath
path
=
do
if
(
isSuffixOf
"/"
path
)
then
(
listDirectory
path
)
else
return
[
path
]
--------------
-- | Json | --
--------------
-- | To read and decode a Json file
-- | To read and decode a Json file
readJson
::
FilePath
->
IO
ByteString
readJson
::
FilePath
->
IO
ByteString
readJson
path
=
Lazy
.
readFile
path
readJson
path
=
Lazy
.
readFile
path
----------------
-- | Parser | --
----------------
-- | To filter the Ngrams of a document based on the termList
-- | To filter the Ngrams of a document based on the termList
filterTerms
::
Patterns
->
(
a
,
Text
)
->
(
a
,
[
Text
])
filterTerms
::
Patterns
->
(
a
,
Text
)
->
(
a
,
[
Text
])
filterTerms
patterns
(
y
,
d
)
=
(
y
,
termsInText
patterns
d
)
filterTerms
patterns
(
y
,
d
)
=
(
y
,
termsInText
patterns
d
)
...
@@ -78,7 +100,25 @@ filterTerms patterns (y,d) = (y,termsInText patterns d)
...
@@ -78,7 +100,25 @@ filterTerms patterns (y,d) = (y,termsInText patterns d)
--------------------------------------
--------------------------------------
-- | To transform a Csv nfile into a readable corpus
-- | To transform a Wos file (or [file]) into a readable corpus
wosToCorpus
::
Int
->
FilePath
->
IO
([(
Int
,
Text
)])
wosToCorpus
limit
path
=
do
files
<-
getFilesFromPath
path
take
limit
<$>
map
(
\
d
->
let
date'
=
fromJust
$
_hyperdataDocument_publication_year
d
title
=
fromJust
$
_hyperdataDocument_title
d
abstr
=
if
(
isJust
$
_hyperdataDocument_abstract
d
)
then
fromJust
$
_hyperdataDocument_abstract
d
else
""
in
(
date'
,
title
<>
" "
<>
abstr
))
<$>
concat
<$>
mapConcurrently
(
\
file
->
filter
(
\
d
->
(
isJust
$
_hyperdataDocument_publication_year
d
)
&&
(
isJust
$
_hyperdataDocument_title
d
))
<$>
parseFile
WOS
(
path
<>
file
)
)
files
-- | To transform a Csv file into a readable corpus
csvToCorpus
::
Int
->
FilePath
->
IO
([(
Int
,
Text
)])
csvToCorpus
::
Int
->
FilePath
->
IO
([(
Int
,
Text
)])
csvToCorpus
limit
path
=
Vector
.
toList
csvToCorpus
limit
path
=
Vector
.
toList
<$>
Vector
.
take
limit
<$>
Vector
.
take
limit
...
@@ -89,8 +129,7 @@ csvToCorpus limit path = Vector.toList
...
@@ -89,8 +129,7 @@ csvToCorpus limit path = Vector.toList
-- | To use the correct parser given a CorpusType
-- | To use the correct parser given a CorpusType
fileToCorpus
::
CorpusParser
->
Int
->
FilePath
->
IO
([(
Int
,
Text
)])
fileToCorpus
::
CorpusParser
->
Int
->
FilePath
->
IO
([(
Int
,
Text
)])
fileToCorpus
parser
limit
path
=
case
parser
of
fileToCorpus
parser
limit
path
=
case
parser
of
-- To do Wos from legacy Main.hs
Wos
->
wosToCorpus
limit
path
Wos
->
undefined
Csv
->
csvToCorpus
limit
path
Csv
->
csvToCorpus
limit
path
...
...
src/Gargantext/Viz/AdaptativePhylo.hs
View file @
349ed2a2
...
@@ -30,8 +30,10 @@ module Gargantext.Viz.AdaptativePhylo where
...
@@ -30,8 +30,10 @@ module Gargantext.Viz.AdaptativePhylo where
import
Data.Aeson
import
Data.Aeson
import
Data.Aeson.TH
(
deriveJSON
)
import
Data.Aeson.TH
(
deriveJSON
)
import
Data.Text
(
Text
)
import
Data.Text
(
Text
,
pack
)
import
Data.Vector
(
Vector
)
import
Data.Vector
(
Vector
)
import
Data.Map
(
Map
)
import
Data.Matrix
(
Matrix
)
import
Gargantext.Core.Utils.Prefix
(
unPrefix
)
import
Gargantext.Core.Utils.Prefix
(
unPrefix
)
import
Gargantext.Prelude
import
Gargantext.Prelude
...
@@ -48,7 +50,7 @@ import Control.Lens (makeLenses)
...
@@ -48,7 +50,7 @@ import Control.Lens (makeLenses)
----------------
----------------
data
CorpusParser
=
Wos
|
Csv
deriving
(
Show
,
Generic
)
data
CorpusParser
=
Wos
|
Csv
deriving
(
Show
,
Generic
,
Eq
)
data
Config
=
data
Config
=
Config
{
corpusPath
::
FilePath
Config
{
corpusPath
::
FilePath
...
@@ -62,9 +64,23 @@ data Config =
...
@@ -62,9 +64,23 @@ data Config =
,
timeStep
::
Int
,
timeStep
::
Int
,
fisSupport
::
Int
,
fisSupport
::
Int
,
fisSize
::
Int
,
fisSize
::
Int
,
branchSize
::
Int
,
branchSize
::
Int
,
safeParall
::
Bool
}
deriving
(
Show
,
Generic
,
Eq
)
}
deriving
(
Show
,
Generic
)
defaultConfig
=
Config
{
corpusPath
=
""
,
listPath
=
""
,
outputPath
=
""
,
corpusParser
=
Csv
,
corpusLimit
=
1000
,
phyloName
=
pack
"Default Phylo"
,
phyloLevel
=
2
,
timePeriod
=
3
,
timeStep
=
1
,
fisSupport
=
2
,
fisSize
=
4
,
branchSize
=
3
}
instance
FromJSON
Config
instance
FromJSON
Config
instance
ToJSON
Config
instance
ToJSON
Config
...
@@ -72,6 +88,30 @@ instance FromJSON CorpusParser
...
@@ -72,6 +88,30 @@ instance FromJSON CorpusParser
instance
ToJSON
CorpusParser
instance
ToJSON
CorpusParser
-- | Software parameters
data
Software
=
Software
{
_software_name
::
Text
,
_software_version
::
Text
}
deriving
(
Generic
,
Show
,
Eq
)
defaultSoftware
=
Software
{
_software_name
=
pack
"Gargantext"
,
_software_version
=
pack
"v4"
}
-- | Global parameters of a Phylo
data
PhyloParam
=
PhyloParam
{
_phyloParam_version
::
Text
,
_phyloParam_software
::
Software
,
_phyloParam_config
::
Config
}
deriving
(
Generic
,
Show
,
Eq
)
defaultPhyloParam
=
PhyloParam
{
_phyloParam_version
=
pack
"v2.adaptative"
,
_phyloParam_software
=
defaultSoftware
,
_phyloParam_config
=
defaultConfig
}
------------------
------------------
-- | Document | --
-- | Document | --
------------------
------------------
...
@@ -87,7 +127,7 @@ type Ngrams = Text
...
@@ -87,7 +127,7 @@ type Ngrams = Text
data
Document
=
Document
data
Document
=
Document
{
date
::
Date
{
date
::
Date
,
text
::
[
Ngrams
]
,
text
::
[
Ngrams
]
}
deriving
(
Show
,
Generic
,
NFData
)
}
deriving
(
Eq
,
Show
,
Generic
,
NFData
)
--------------------
--------------------
...
@@ -102,10 +142,40 @@ data PhyloFoundations = PhyloFoundations
...
@@ -102,10 +142,40 @@ data PhyloFoundations = PhyloFoundations
}
deriving
(
Generic
,
Show
,
Eq
)
}
deriving
(
Generic
,
Show
,
Eq
)
---------------------------
-- | Coocurency Matrix | --
---------------------------
-- | Cooc : a weighted (Double) coocurency matrix
type
Cooc
=
Matrix
Double
-------------------
-- | Phylomemy | --
-------------------
-- | Phylo datatype of a phylomemy
-- foundations : the foundations of the phylo
-- timeCooc : a Map of coocurency by minimal unit of time (ex: by year)
-- timeDocs : a Map with the numbers of docs by minimal unit of time (ex: by year)
-- param : the parameters of the phylomemy (with the user's configuration)
data
Phylo
=
Phylo
{
_phylo_foundations
::
PhyloFoundations
,
_phylo_timeCooc
::
!
(
Map
Date
Cooc
)
,
_phylo_timeDocs
::
!
(
Map
Date
Double
)
,
_phylo_param
::
PhyloParam
}
deriving
(
Generic
,
Show
,
Eq
)
----------------
----------------
-- | Lenses | --
-- | Lenses | --
----------------
----------------
makeLenses
''
C
onfig
makeLenses
''
P
hyloFoundations
makeLenses
''
P
hyloFoundations
------------------------
------------------------
...
...
src/Gargantext/Viz/Phylo/PhyloExample.hs
View file @
349ed2a2
...
@@ -20,6 +20,7 @@ Portability : POSIX
...
@@ -20,6 +20,7 @@ Portability : POSIX
module
Gargantext.Viz.Phylo.PhyloExample
where
module
Gargantext.Viz.Phylo.PhyloExample
where
import
Data.List
(
sortOn
)
import
Data.List
(
sortOn
)
import
Data.Map
(
Map
)
import
Data.Text
(
Text
,
toLower
)
import
Data.Text
(
Text
,
toLower
)
import
Gargantext.Prelude
import
Gargantext.Prelude
...
@@ -27,6 +28,7 @@ import Gargantext.Text.Context (TermList)
...
@@ -27,6 +28,7 @@ import Gargantext.Text.Context (TermList)
import
Gargantext.Text.Terms.Mono
(
monoTexts
)
import
Gargantext.Text.Terms.Mono
(
monoTexts
)
import
Gargantext.Viz.AdaptativePhylo
import
Gargantext.Viz.AdaptativePhylo
import
Gargantext.Viz.Phylo.PhyloTools
import
Gargantext.Viz.Phylo.PhyloTools
import
Gargantext.Viz.Phylo.PhyloMaker
import
Control.Lens
import
Control.Lens
...
@@ -38,7 +40,19 @@ import qualified Data.Vector as Vector
...
@@ -38,7 +40,19 @@ import qualified Data.Vector as Vector
--------------------------------------------
--------------------------------------------
-- Next is to build the config and the phyloLevel 0
-- cooc et phyloBase
nbDocsByYear
::
Map
Date
Double
nbDocsByYear
=
nbDocsByTime
docs
1
config
::
Config
config
=
defaultConfig
{
phyloName
=
"Cesar et Cleopatre"
,
branchSize
=
0
,
fisSupport
=
0
,
fisSize
=
0
}
docs
::
[
Document
]
docs
::
[
Document
]
...
...
src/Gargantext/Viz/Phylo/PhyloMaker.hs
View file @
349ed2a2
...
@@ -13,4 +13,24 @@ Portability : POSIX
...
@@ -13,4 +13,24 @@ Portability : POSIX
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE MultiParamTypeClasses #-}
{-# LANGUAGE MultiParamTypeClasses #-}
module
Gargantext.Viz.Phylo.PhyloMaker
where
module
Gargantext.Viz.Phylo.PhyloMaker
where
\ No newline at end of file
import
Data.Map
(
Map
,
fromListWith
,
keys
,
unionWith
,
fromList
)
import
Gargantext.Prelude
import
Gargantext.Viz.AdaptativePhylo
import
Gargantext.Viz.Phylo.PhyloTools
--------------------
-- | to Phylo 0 | --
--------------------
nbDocsByTime
::
[
Document
]
->
Int
->
Map
Date
Double
nbDocsByTime
docs
step
=
let
docs'
=
fromListWith
(
+
)
$
map
(
\
d
->
(
date
d
,
1
))
docs
time
=
fromList
$
map
(
\
t
->
(
t
,
0
))
$
toTimeScale
(
keys
docs'
)
step
in
unionWith
(
+
)
time
docs'
src/Gargantext/Viz/Phylo/PhyloTools.hs
View file @
349ed2a2
...
@@ -19,6 +19,7 @@ module Gargantext.Viz.Phylo.PhyloTools where
...
@@ -19,6 +19,7 @@ module Gargantext.Viz.Phylo.PhyloTools where
import
Data.Maybe
(
Maybe
,
fromMaybe
)
import
Data.Maybe
(
Maybe
,
fromMaybe
)
import
Data.Text
(
Text
)
import
Data.Text
(
Text
)
import
Data.Vector
(
Vector
)
import
Data.Vector
(
Vector
)
import
Data.List
(
sort
)
import
Gargantext.Prelude
import
Gargantext.Prelude
import
Gargantext.Viz.AdaptativePhylo
import
Gargantext.Viz.AdaptativePhylo
...
@@ -28,23 +29,6 @@ import GHC.IO (FilePath)
...
@@ -28,23 +29,6 @@ import GHC.IO (FilePath)
import
qualified
Data.Vector
as
Vector
import
qualified
Data.Vector
as
Vector
----------------
-- | Config | --
----------------
-- | Define a default value
def
::
a
->
Maybe
a
->
a
def
=
fromMaybe
-- | To init a configuration
initConfig
::
Maybe
FilePath
->
Maybe
FilePath
->
Maybe
FilePath
->
Maybe
CorpusParser
->
Maybe
Int
->
Maybe
Text
->
Maybe
Int
->
Maybe
Int
->
Maybe
Int
->
Maybe
Int
->
Maybe
Int
->
Maybe
Int
->
Maybe
Bool
->
Config
initConfig
(
def
""
->
corpus
)
(
def
""
->
mapList
)
(
def
""
->
output
)
(
def
Csv
->
parser
)
(
def
10000
->
limit
)
(
def
"A phylomemy"
->
name
)
(
def
2
->
level
)
(
def
3
->
period
)
(
def
1
->
step
)
(
def
3
->
support
)
(
def
4
->
clique
)
(
def
3
->
minBranchSize
)
(
def
True
->
safe
)
=
Config
corpus
mapList
output
parser
limit
name
level
period
step
support
clique
minBranchSize
safe
---------------------
---------------------
-- | Foundations | --
-- | Foundations | --
---------------------
---------------------
...
@@ -52,4 +36,16 @@ initConfig (def "" -> corpus) (def "" -> mapList) (def "" -> output) (def Csv ->
...
@@ -52,4 +36,16 @@ initConfig (def "" -> corpus) (def "" -> mapList) (def "" -> output) (def Csv ->
-- | Is this Ngrams a Foundations Root ?
-- | Is this Ngrams a Foundations Root ?
isRoots
::
Ngrams
->
Vector
Ngrams
->
Bool
isRoots
::
Ngrams
->
Vector
Ngrams
->
Bool
isRoots
n
ns
=
Vector
.
elem
n
ns
isRoots
n
ns
=
Vector
.
elem
n
ns
\ No newline at end of file
--------------
-- | Time | --
--------------
-- | Get a regular & ascendante timeScale from a given list of dates
toTimeScale
::
[
Date
]
->
Int
->
[
Date
]
toTimeScale
dates
step
=
let
dates'
=
sort
dates
in
[
head'
"toTimeScale"
dates'
,
((
head'
"toTimeScale"
dates'
)
+
step
)
..
last'
"toTimeScale"
dates'
]
\ No newline at end of file
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment