Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
158
Issues
158
List
Board
Labels
Milestones
Merge Requests
11
Merge Requests
11
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
gargantext
haskell-gargantext
Commits
6cb3efe5
Commit
6cb3efe5
authored
Aug 24, 2021
by
Przemyslaw Kaminski
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
[CsvDoc] implement Maybe for year/month/date
parent
5bee1178
Pipeline
#1739
passed with stage
in 36 minutes and 21 seconds
Changes
5
Pipelines
4
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
48 additions
and
26 deletions
+48
-26
Main.hs
bin/gargantext-adaptative-phylo/Main.hs
+8
-2
Main.hs
bin/gargantext-cli/Main.hs
+2
-2
List.hs
src/Gargantext/API/Ngrams/List.hs
+1
-1
CSV.hs
src/Gargantext/Core/Text/Corpus/Parsers/CSV.hs
+35
-19
Json2Csv.hs
src/Gargantext/Core/Text/Corpus/Parsers/Json2Csv.hs
+2
-2
No files found.
bin/gargantext-adaptative-phylo/Main.hs
View file @
6cb3efe5
...
...
@@ -21,6 +21,7 @@ import Crypto.Hash.SHA256 (hash)
import
Data.Aeson
import
Data.Either
(
Either
(
..
))
import
Data.List
(
concat
,
nub
,
isSuffixOf
)
import
Data.Maybe
(
fromMaybe
)
import
Data.String
(
String
)
import
GHC.IO
(
FilePath
)
import
qualified
Prelude
as
Prelude
...
...
@@ -152,8 +153,13 @@ csvToDocs parser patterns time path =
Right
r
->
pure
$
Vector
.
toList
$
Vector
.
take
limit
$
Vector
.
map
(
\
row
->
Document
(
toPhyloDate
(
Csv
.
unIntOrDec
$
csv_publication_year
row
)
(
csv_publication_month
row
)
(
csv_publication_day
row
)
time
)
(
toPhyloDate'
(
Csv
.
unIntOrDec
$
csv_publication_year
row
)
(
csv_publication_month
row
)
(
csv_publication_day
row
))
$
Vector
.
map
(
\
row
->
Document
(
toPhyloDate
(
Csv
.
fromMIntOrDec
Csv
.
defaultYear
$
csv_publication_year
row
)
(
fromMaybe
Csv
.
defaultMonth
$
csv_publication_month
row
)
(
fromMaybe
Csv
.
defaultDay
$
csv_publication_day
row
)
time
)
(
toPhyloDate'
(
Csv
.
fromMIntOrDec
Csv
.
defaultYear
$
csv_publication_year
row
)
(
fromMaybe
Csv
.
defaultMonth
$
csv_publication_month
row
)
(
fromMaybe
Csv
.
defaultDay
$
csv_publication_day
row
))
(
termsInText
patterns
$
(
csv_title
row
)
<>
" "
<>
(
csv_abstract
row
))
Nothing
[]
...
...
bin/gargantext-cli/Main.hs
View file @
6cb3efe5
...
...
@@ -42,7 +42,7 @@ import Gargantext.Core.Types
import
Gargantext.Core.Text.Terms
import
Gargantext.Core.Text.Context
import
Gargantext.Core.Text.Terms.WithList
import
Gargantext.Core.Text.Corpus.Parsers.CSV
(
readFile
,
csv_title
,
csv_abstract
,
csv_publication_year
,
unIntOrDec
)
import
Gargantext.Core.Text.Corpus.Parsers.CSV
(
readFile
,
csv_title
,
csv_abstract
,
csv_publication_year
,
unIntOrDec
,
fromMIntOrDec
,
defaultYear
)
import
Gargantext.Core.Text.List.Formats.CSV
(
csvMapTermList
)
import
Gargantext.Core.Text.Terms
(
terms
)
import
Gargantext.Core.Text.Metrics.Count
(
coocOnContexts
,
Coocs
)
...
...
@@ -91,7 +91,7 @@ main = do
Right
cf
->
do
let
corpus
=
DM
.
fromListWith
(
<>
)
.
DV
.
toList
.
DV
.
map
(
\
n
->
(
unIntOrDec
$
csv_publication_year
n
,
[(
csv_title
n
)
<>
" "
<>
(
csv_abstract
n
)]))
.
DV
.
map
(
\
n
->
(
fromMIntOrDec
defaultYear
$
csv_publication_year
n
,
[(
csv_title
n
)
<>
" "
<>
(
csv_abstract
n
)]))
.
snd
$
cf
-- termListMap :: [Text]
...
...
src/Gargantext/API/Ngrams/List.hs
View file @
6cb3efe5
...
...
@@ -283,7 +283,7 @@ csvPost l m = do
let
lst
=
readCsvText
m
let
p
=
parseCsvData
lst
--printDebug "[csvPost] lst" lst
--
printDebug "[csvPost] p" p
printDebug
"[csvPost] p"
p
_
<-
setListNgrams
l
NgramsTerms
p
pure
True
------------------------------------------------------------------------
...
...
src/Gargantext/Core/Text/Corpus/Parsers/CSV.hs
View file @
6cb3efe5
...
...
@@ -20,6 +20,7 @@ import qualified Data.ByteString.Lazy as BL
import
Data.Char
(
ord
)
import
Data.Csv
import
Data.Either
(
Either
(
..
))
import
Data.Maybe
(
fromMaybe
)
import
Data.Text
(
Text
,
pack
,
length
,
intercalate
)
import
Data.Time.Segment
(
jour
)
import
qualified
Data.Vector
as
V
...
...
@@ -85,8 +86,10 @@ toDoc (CsvGargV3 did dt _ dpy dpm dpd dab dau) =
-- | Types Conversions
toDocs
::
Vector
CsvDoc
->
[
CsvGargV3
]
toDocs
v
=
V
.
toList
$
V
.
zipWith
(
\
nId
(
CsvDoc
t
s
(
IntOrDec
py
)
pm
pd
abst
auth
)
->
CsvGargV3
nId
t
s
py
pm
pd
abst
auth
)
$
V
.
zipWith
(
\
nId
(
CsvDoc
t
s
mPy
pm
pd
abst
auth
)
->
CsvGargV3
nId
t
s
(
fromMIntOrDec
defaultYear
mPy
)
(
fromMaybe
defaultMonth
pm
)
(
fromMaybe
defaultDay
pd
)
abst
auth
)
(
V
.
enumFromN
1
(
V
.
length
v''
))
v''
where
v''
=
V
.
foldl
(
\
v'
sep
->
V
.
concatMap
(
splitDoc
(
docsSize
v'
)
sep
)
v'
)
v
seps
...
...
@@ -96,7 +99,7 @@ toDocs v = V.toList
fromDocs
::
Vector
CsvGargV3
->
Vector
CsvDoc
fromDocs
docs
=
V
.
map
fromDocs'
docs
where
fromDocs'
(
CsvGargV3
_
t
s
py
pm
pd
abst
auth
)
=
(
CsvDoc
t
s
(
IntOrDec
py
)
pm
pd
abst
auth
)
fromDocs'
(
CsvGargV3
_
t
s
py
pm
pd
abst
auth
)
=
(
CsvDoc
t
s
(
Just
$
IntOrDec
py
)
(
Just
pm
)
(
Just
pd
)
abst
auth
)
---------------------------------------------------------------
-- | Split a document in its context
...
...
@@ -150,12 +153,21 @@ instance FromField IntOrDec where
instance
ToField
IntOrDec
where
toField
(
IntOrDec
i
)
=
toField
i
fromMIntOrDec
::
Int
->
Maybe
IntOrDec
->
Int
fromMIntOrDec
default
'
m
Val
=
unIntOrDec
$
fromMaybe
(
IntOrDec
default
')
mVal
defaultYear
::
Int
defaultYear
=
1973
defaultMonth
::
Int
defaultMonth
=
1
defaultDay
::
Int
defaultDay
=
1
data
CsvDoc
=
CsvDoc
{
csv_title
::
!
Text
,
csv_source
::
!
Text
,
csv_publication_year
::
!
IntOrDec
,
csv_publication_month
::
!
Int
,
csv_publication_day
::
!
Int
{
csv_title
::
!
Text
,
csv_source
::
!
Text
,
csv_publication_year
::
!
(
Maybe
IntOrDec
)
,
csv_publication_month
::
!
(
Maybe
Int
)
,
csv_publication_day
::
!
(
Maybe
Int
)
,
csv_abstract
::
!
Text
,
csv_authors
::
!
Text
}
...
...
@@ -172,21 +184,21 @@ instance FromNamedRecord CsvDoc where
instance
ToNamedRecord
CsvDoc
where
toNamedRecord
(
CsvDoc
t
s
py
pm
pd
abst
aut
)
=
namedRecord
[
"title"
.=
t
,
"source"
.=
s
namedRecord
[
"title"
.=
t
,
"source"
.=
s
,
"publication_year"
.=
py
,
"publication_month"
.=
pm
,
"publication_day"
.=
pd
,
"abstract"
.=
abst
,
"authors"
.=
aut
]
]
hyperdataDocument2csvDoc
::
HyperdataDocument
->
CsvDoc
hyperdataDocument2csvDoc
h
=
CsvDoc
(
m
$
_hd_title
h
)
(
m
$
_hd_source
h
)
(
IntOrDec
$
mI
$
_hd_publication_year
h
)
(
mI
$
_hd_publication_month
h
)
(
mI
$
_hd_publication_day
h
)
(
Just
$
IntOrDec
$
mI
$
_hd_publication_year
h
)
(
Just
$
mI
$
_hd_publication_month
h
)
(
Just
$
mI
$
_hd_publication_day
h
)
(
m
$
_hd_abstract
h
)
(
m
$
_hd_authors
h
)
...
...
@@ -368,7 +380,7 @@ csvHal2doc (CsvHal title source
csv2doc
::
CsvDoc
->
HyperdataDocument
csv2doc
(
CsvDoc
title
source
(
IntOrDec
pub_year
)
pub_month
pub_d
ay
mPubYear
mPubMonth
mPubD
ay
abstract
authors
)
=
HyperdataDocument
(
Just
"CsvHal"
)
Nothing
Nothing
...
...
@@ -380,14 +392,18 @@ csv2doc (CsvDoc title source
Nothing
(
Just
source
)
(
Just
abstract
)
(
Just
$
pack
.
show
$
jour
(
fromIntegral
pub
_year
)
pub_month
pub_d
ay
)
(
Just
$
fromIntegral
pub_y
ear
)
(
Just
pub
_m
onth
)
(
Just
pub
_d
ay
)
(
Just
$
pack
.
show
$
jour
(
fromIntegral
pub
Year
)
pubMonth
pubD
ay
)
(
Just
pubY
ear
)
(
Just
pub
M
onth
)
(
Just
pub
D
ay
)
Nothing
Nothing
Nothing
Nothing
where
pubYear
=
fromMIntOrDec
defaultYear
mPubYear
pubMonth
=
fromMaybe
defaultMonth
mPubMonth
pubDay
=
fromMaybe
defaultDay
mPubDay
------------------------------------------------------------------------
parseHal
::
FilePath
->
IO
(
Either
Prelude
.
String
[
HyperdataDocument
])
...
...
src/Gargantext/Core/Text/Corpus/Parsers/Json2Csv.hs
View file @
6cb3efe5
...
...
@@ -28,7 +28,7 @@ import System.IO (FilePath)
import
Gargantext.Core.Text.Corpus.Parsers.CSV
(
CsvDoc
(
..
),
writeFile
,
headerCsvGargV3
)
import
Data.Vector
(
fromList
)
data
Patent
=
Patent
{
_patent_title
::
Text
data
Patent
=
Patent
{
_patent_title
::
Text
,
_patent_abstract
::
Text
,
_patent_year
::
Text
,
_patent_id
::
Text
...
...
@@ -49,7 +49,7 @@ json2csv fin fout = do
patent2csvDoc
::
Patent
->
CsvDoc
patent2csvDoc
(
Patent
title
abstract
year
_
)
=
CsvDoc
title
"Source"
(
read
(
unpack
year
))
1
1
abstract
"Authors"
CsvDoc
title
"Source"
(
Just
$
read
(
unpack
year
))
(
Just
1
)
(
Just
1
)
abstract
"Authors"
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment