Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
153
Issues
153
List
Board
Labels
Milestones
Merge Requests
9
Merge Requests
9
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
gargantext
haskell-gargantext
Commits
6cb3efe5
Commit
6cb3efe5
authored
Aug 24, 2021
by
Przemyslaw Kaminski
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
[CsvDoc] implement Maybe for year/month/date
parent
5bee1178
Pipeline
#1739
passed with stage
in 36 minutes and 21 seconds
Changes
5
Pipelines
4
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
48 additions
and
26 deletions
+48
-26
Main.hs
bin/gargantext-adaptative-phylo/Main.hs
+8
-2
Main.hs
bin/gargantext-cli/Main.hs
+2
-2
List.hs
src/Gargantext/API/Ngrams/List.hs
+1
-1
CSV.hs
src/Gargantext/Core/Text/Corpus/Parsers/CSV.hs
+35
-19
Json2Csv.hs
src/Gargantext/Core/Text/Corpus/Parsers/Json2Csv.hs
+2
-2
No files found.
bin/gargantext-adaptative-phylo/Main.hs
View file @
6cb3efe5
...
@@ -21,6 +21,7 @@ import Crypto.Hash.SHA256 (hash)
...
@@ -21,6 +21,7 @@ import Crypto.Hash.SHA256 (hash)
import
Data.Aeson
import
Data.Aeson
import
Data.Either
(
Either
(
..
))
import
Data.Either
(
Either
(
..
))
import
Data.List
(
concat
,
nub
,
isSuffixOf
)
import
Data.List
(
concat
,
nub
,
isSuffixOf
)
import
Data.Maybe
(
fromMaybe
)
import
Data.String
(
String
)
import
Data.String
(
String
)
import
GHC.IO
(
FilePath
)
import
GHC.IO
(
FilePath
)
import
qualified
Prelude
as
Prelude
import
qualified
Prelude
as
Prelude
...
@@ -152,8 +153,13 @@ csvToDocs parser patterns time path =
...
@@ -152,8 +153,13 @@ csvToDocs parser patterns time path =
Right
r
->
Right
r
->
pure
$
Vector
.
toList
pure
$
Vector
.
toList
$
Vector
.
take
limit
$
Vector
.
take
limit
$
Vector
.
map
(
\
row
->
Document
(
toPhyloDate
(
Csv
.
unIntOrDec
$
csv_publication_year
row
)
(
csv_publication_month
row
)
(
csv_publication_day
row
)
time
)
$
Vector
.
map
(
\
row
->
Document
(
toPhyloDate
(
Csv
.
fromMIntOrDec
Csv
.
defaultYear
$
csv_publication_year
row
)
(
toPhyloDate'
(
Csv
.
unIntOrDec
$
csv_publication_year
row
)
(
csv_publication_month
row
)
(
csv_publication_day
row
))
(
fromMaybe
Csv
.
defaultMonth
$
csv_publication_month
row
)
(
fromMaybe
Csv
.
defaultDay
$
csv_publication_day
row
)
time
)
(
toPhyloDate'
(
Csv
.
fromMIntOrDec
Csv
.
defaultYear
$
csv_publication_year
row
)
(
fromMaybe
Csv
.
defaultMonth
$
csv_publication_month
row
)
(
fromMaybe
Csv
.
defaultDay
$
csv_publication_day
row
))
(
termsInText
patterns
$
(
csv_title
row
)
<>
" "
<>
(
csv_abstract
row
))
(
termsInText
patterns
$
(
csv_title
row
)
<>
" "
<>
(
csv_abstract
row
))
Nothing
Nothing
[]
[]
...
...
bin/gargantext-cli/Main.hs
View file @
6cb3efe5
...
@@ -42,7 +42,7 @@ import Gargantext.Core.Types
...
@@ -42,7 +42,7 @@ import Gargantext.Core.Types
import
Gargantext.Core.Text.Terms
import
Gargantext.Core.Text.Terms
import
Gargantext.Core.Text.Context
import
Gargantext.Core.Text.Context
import
Gargantext.Core.Text.Terms.WithList
import
Gargantext.Core.Text.Terms.WithList
import
Gargantext.Core.Text.Corpus.Parsers.CSV
(
readFile
,
csv_title
,
csv_abstract
,
csv_publication_year
,
unIntOrDec
)
import
Gargantext.Core.Text.Corpus.Parsers.CSV
(
readFile
,
csv_title
,
csv_abstract
,
csv_publication_year
,
unIntOrDec
,
fromMIntOrDec
,
defaultYear
)
import
Gargantext.Core.Text.List.Formats.CSV
(
csvMapTermList
)
import
Gargantext.Core.Text.List.Formats.CSV
(
csvMapTermList
)
import
Gargantext.Core.Text.Terms
(
terms
)
import
Gargantext.Core.Text.Terms
(
terms
)
import
Gargantext.Core.Text.Metrics.Count
(
coocOnContexts
,
Coocs
)
import
Gargantext.Core.Text.Metrics.Count
(
coocOnContexts
,
Coocs
)
...
@@ -91,7 +91,7 @@ main = do
...
@@ -91,7 +91,7 @@ main = do
Right
cf
->
do
Right
cf
->
do
let
corpus
=
DM
.
fromListWith
(
<>
)
let
corpus
=
DM
.
fromListWith
(
<>
)
.
DV
.
toList
.
DV
.
toList
.
DV
.
map
(
\
n
->
(
unIntOrDec
$
csv_publication_year
n
,
[(
csv_title
n
)
<>
" "
<>
(
csv_abstract
n
)]))
.
DV
.
map
(
\
n
->
(
fromMIntOrDec
defaultYear
$
csv_publication_year
n
,
[(
csv_title
n
)
<>
" "
<>
(
csv_abstract
n
)]))
.
snd
$
cf
.
snd
$
cf
-- termListMap :: [Text]
-- termListMap :: [Text]
...
...
src/Gargantext/API/Ngrams/List.hs
View file @
6cb3efe5
...
@@ -283,7 +283,7 @@ csvPost l m = do
...
@@ -283,7 +283,7 @@ csvPost l m = do
let
lst
=
readCsvText
m
let
lst
=
readCsvText
m
let
p
=
parseCsvData
lst
let
p
=
parseCsvData
lst
--printDebug "[csvPost] lst" lst
--printDebug "[csvPost] lst" lst
--
printDebug "[csvPost] p" p
printDebug
"[csvPost] p"
p
_
<-
setListNgrams
l
NgramsTerms
p
_
<-
setListNgrams
l
NgramsTerms
p
pure
True
pure
True
------------------------------------------------------------------------
------------------------------------------------------------------------
...
...
src/Gargantext/Core/Text/Corpus/Parsers/CSV.hs
View file @
6cb3efe5
...
@@ -20,6 +20,7 @@ import qualified Data.ByteString.Lazy as BL
...
@@ -20,6 +20,7 @@ import qualified Data.ByteString.Lazy as BL
import
Data.Char
(
ord
)
import
Data.Char
(
ord
)
import
Data.Csv
import
Data.Csv
import
Data.Either
(
Either
(
..
))
import
Data.Either
(
Either
(
..
))
import
Data.Maybe
(
fromMaybe
)
import
Data.Text
(
Text
,
pack
,
length
,
intercalate
)
import
Data.Text
(
Text
,
pack
,
length
,
intercalate
)
import
Data.Time.Segment
(
jour
)
import
Data.Time.Segment
(
jour
)
import
qualified
Data.Vector
as
V
import
qualified
Data.Vector
as
V
...
@@ -85,8 +86,10 @@ toDoc (CsvGargV3 did dt _ dpy dpm dpd dab dau) =
...
@@ -85,8 +86,10 @@ toDoc (CsvGargV3 did dt _ dpy dpm dpd dab dau) =
-- | Types Conversions
-- | Types Conversions
toDocs
::
Vector
CsvDoc
->
[
CsvGargV3
]
toDocs
::
Vector
CsvDoc
->
[
CsvGargV3
]
toDocs
v
=
V
.
toList
toDocs
v
=
V
.
toList
$
V
.
zipWith
(
\
nId
(
CsvDoc
t
s
(
IntOrDec
py
)
pm
pd
abst
auth
)
$
V
.
zipWith
(
\
nId
(
CsvDoc
t
s
mPy
pm
pd
abst
auth
)
->
CsvGargV3
nId
t
s
py
pm
pd
abst
auth
)
->
CsvGargV3
nId
t
s
(
fromMIntOrDec
defaultYear
mPy
)
(
fromMaybe
defaultMonth
pm
)
(
fromMaybe
defaultDay
pd
)
abst
auth
)
(
V
.
enumFromN
1
(
V
.
length
v''
))
v''
(
V
.
enumFromN
1
(
V
.
length
v''
))
v''
where
where
v''
=
V
.
foldl
(
\
v'
sep
->
V
.
concatMap
(
splitDoc
(
docsSize
v'
)
sep
)
v'
)
v
seps
v''
=
V
.
foldl
(
\
v'
sep
->
V
.
concatMap
(
splitDoc
(
docsSize
v'
)
sep
)
v'
)
v
seps
...
@@ -96,7 +99,7 @@ toDocs v = V.toList
...
@@ -96,7 +99,7 @@ toDocs v = V.toList
fromDocs
::
Vector
CsvGargV3
->
Vector
CsvDoc
fromDocs
::
Vector
CsvGargV3
->
Vector
CsvDoc
fromDocs
docs
=
V
.
map
fromDocs'
docs
fromDocs
docs
=
V
.
map
fromDocs'
docs
where
where
fromDocs'
(
CsvGargV3
_
t
s
py
pm
pd
abst
auth
)
=
(
CsvDoc
t
s
(
IntOrDec
py
)
pm
pd
abst
auth
)
fromDocs'
(
CsvGargV3
_
t
s
py
pm
pd
abst
auth
)
=
(
CsvDoc
t
s
(
Just
$
IntOrDec
py
)
(
Just
pm
)
(
Just
pd
)
abst
auth
)
---------------------------------------------------------------
---------------------------------------------------------------
-- | Split a document in its context
-- | Split a document in its context
...
@@ -150,12 +153,21 @@ instance FromField IntOrDec where
...
@@ -150,12 +153,21 @@ instance FromField IntOrDec where
instance
ToField
IntOrDec
where
instance
ToField
IntOrDec
where
toField
(
IntOrDec
i
)
=
toField
i
toField
(
IntOrDec
i
)
=
toField
i
fromMIntOrDec
::
Int
->
Maybe
IntOrDec
->
Int
fromMIntOrDec
default
'
m
Val
=
unIntOrDec
$
fromMaybe
(
IntOrDec
default
')
mVal
defaultYear
::
Int
defaultYear
=
1973
defaultMonth
::
Int
defaultMonth
=
1
defaultDay
::
Int
defaultDay
=
1
data
CsvDoc
=
CsvDoc
data
CsvDoc
=
CsvDoc
{
csv_title
::
!
Text
{
csv_title
::
!
Text
,
csv_source
::
!
Text
,
csv_source
::
!
Text
,
csv_publication_year
::
!
IntOrDec
,
csv_publication_year
::
!
(
Maybe
IntOrDec
)
,
csv_publication_month
::
!
Int
,
csv_publication_month
::
!
(
Maybe
Int
)
,
csv_publication_day
::
!
Int
,
csv_publication_day
::
!
(
Maybe
Int
)
,
csv_abstract
::
!
Text
,
csv_abstract
::
!
Text
,
csv_authors
::
!
Text
,
csv_authors
::
!
Text
}
}
...
@@ -172,21 +184,21 @@ instance FromNamedRecord CsvDoc where
...
@@ -172,21 +184,21 @@ instance FromNamedRecord CsvDoc where
instance
ToNamedRecord
CsvDoc
where
instance
ToNamedRecord
CsvDoc
where
toNamedRecord
(
CsvDoc
t
s
py
pm
pd
abst
aut
)
=
toNamedRecord
(
CsvDoc
t
s
py
pm
pd
abst
aut
)
=
namedRecord
[
"title"
.=
t
namedRecord
[
"title"
.=
t
,
"source"
.=
s
,
"source"
.=
s
,
"publication_year"
.=
py
,
"publication_year"
.=
py
,
"publication_month"
.=
pm
,
"publication_month"
.=
pm
,
"publication_day"
.=
pd
,
"publication_day"
.=
pd
,
"abstract"
.=
abst
,
"abstract"
.=
abst
,
"authors"
.=
aut
,
"authors"
.=
aut
]
]
hyperdataDocument2csvDoc
::
HyperdataDocument
->
CsvDoc
hyperdataDocument2csvDoc
::
HyperdataDocument
->
CsvDoc
hyperdataDocument2csvDoc
h
=
CsvDoc
(
m
$
_hd_title
h
)
hyperdataDocument2csvDoc
h
=
CsvDoc
(
m
$
_hd_title
h
)
(
m
$
_hd_source
h
)
(
m
$
_hd_source
h
)
(
IntOrDec
$
mI
$
_hd_publication_year
h
)
(
Just
$
IntOrDec
$
mI
$
_hd_publication_year
h
)
(
mI
$
_hd_publication_month
h
)
(
Just
$
mI
$
_hd_publication_month
h
)
(
mI
$
_hd_publication_day
h
)
(
Just
$
mI
$
_hd_publication_day
h
)
(
m
$
_hd_abstract
h
)
(
m
$
_hd_abstract
h
)
(
m
$
_hd_authors
h
)
(
m
$
_hd_authors
h
)
...
@@ -368,7 +380,7 @@ csvHal2doc (CsvHal title source
...
@@ -368,7 +380,7 @@ csvHal2doc (CsvHal title source
csv2doc
::
CsvDoc
->
HyperdataDocument
csv2doc
::
CsvDoc
->
HyperdataDocument
csv2doc
(
CsvDoc
title
source
csv2doc
(
CsvDoc
title
source
(
IntOrDec
pub_year
)
pub_month
pub_d
ay
mPubYear
mPubMonth
mPubD
ay
abstract
authors
)
=
HyperdataDocument
(
Just
"CsvHal"
)
abstract
authors
)
=
HyperdataDocument
(
Just
"CsvHal"
)
Nothing
Nothing
Nothing
Nothing
...
@@ -380,14 +392,18 @@ csv2doc (CsvDoc title source
...
@@ -380,14 +392,18 @@ csv2doc (CsvDoc title source
Nothing
Nothing
(
Just
source
)
(
Just
source
)
(
Just
abstract
)
(
Just
abstract
)
(
Just
$
pack
.
show
$
jour
(
fromIntegral
pub
_year
)
pub_month
pub_d
ay
)
(
Just
$
pack
.
show
$
jour
(
fromIntegral
pub
Year
)
pubMonth
pubD
ay
)
(
Just
$
fromIntegral
pub_y
ear
)
(
Just
pubY
ear
)
(
Just
pub
_m
onth
)
(
Just
pub
M
onth
)
(
Just
pub
_d
ay
)
(
Just
pub
D
ay
)
Nothing
Nothing
Nothing
Nothing
Nothing
Nothing
Nothing
Nothing
where
pubYear
=
fromMIntOrDec
defaultYear
mPubYear
pubMonth
=
fromMaybe
defaultMonth
mPubMonth
pubDay
=
fromMaybe
defaultDay
mPubDay
------------------------------------------------------------------------
------------------------------------------------------------------------
parseHal
::
FilePath
->
IO
(
Either
Prelude
.
String
[
HyperdataDocument
])
parseHal
::
FilePath
->
IO
(
Either
Prelude
.
String
[
HyperdataDocument
])
...
...
src/Gargantext/Core/Text/Corpus/Parsers/Json2Csv.hs
View file @
6cb3efe5
...
@@ -28,7 +28,7 @@ import System.IO (FilePath)
...
@@ -28,7 +28,7 @@ import System.IO (FilePath)
import
Gargantext.Core.Text.Corpus.Parsers.CSV
(
CsvDoc
(
..
),
writeFile
,
headerCsvGargV3
)
import
Gargantext.Core.Text.Corpus.Parsers.CSV
(
CsvDoc
(
..
),
writeFile
,
headerCsvGargV3
)
import
Data.Vector
(
fromList
)
import
Data.Vector
(
fromList
)
data
Patent
=
Patent
{
_patent_title
::
Text
data
Patent
=
Patent
{
_patent_title
::
Text
,
_patent_abstract
::
Text
,
_patent_abstract
::
Text
,
_patent_year
::
Text
,
_patent_year
::
Text
,
_patent_id
::
Text
,
_patent_id
::
Text
...
@@ -49,7 +49,7 @@ json2csv fin fout = do
...
@@ -49,7 +49,7 @@ json2csv fin fout = do
patent2csvDoc
::
Patent
->
CsvDoc
patent2csvDoc
::
Patent
->
CsvDoc
patent2csvDoc
(
Patent
title
abstract
year
_
)
=
patent2csvDoc
(
Patent
title
abstract
year
_
)
=
CsvDoc
title
"Source"
(
read
(
unpack
year
))
1
1
abstract
"Authors"
CsvDoc
title
"Source"
(
Just
$
read
(
unpack
year
))
(
Just
1
)
(
Just
1
)
abstract
"Authors"
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment