Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
H
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Przemyslaw Kaminski
haskell-gargantext
Commits
09e9fa50
Commit
09e9fa50
authored
Aug 05, 2021
by
Przemyslaw Kaminski
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
[csv] more work on making the parser accept looser input
parent
d63df339
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
28 additions
and
17 deletions
+28
-17
Main.hs
bin/gargantext-adaptative-phylo/Main.hs
+3
-3
Main.hs
bin/gargantext-cli/Main.hs
+2
-2
CSV.hs
src/Gargantext/Core/Text/Corpus/Parsers/CSV.hs
+23
-12
No files found.
bin/gargantext-adaptative-phylo/Main.hs
View file @
09e9fa50
...
@@ -32,7 +32,6 @@ import Data.Time.Calendar (fromGregorian, diffGregorianDurationClip, cdMonths, d
...
@@ -32,7 +32,6 @@ import Data.Time.Calendar (fromGregorian, diffGregorianDurationClip, cdMonths, d
import
qualified
Data.ByteString.Char8
as
C8
import
qualified
Data.ByteString.Char8
as
C8
import
qualified
Data.ByteString.Lazy
as
Lazy
import
qualified
Data.ByteString.Lazy
as
Lazy
import
qualified
Data.Vector
as
Vector
import
qualified
Data.Vector
as
Vector
import
qualified
Gargantext.Core.Text.Corpus.Parsers.CSV
as
Csv
import
qualified
Data.Text
as
T
import
qualified
Data.Text
as
T
import
Gargantext.Prelude
import
Gargantext.Prelude
...
@@ -40,6 +39,7 @@ import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..))
...
@@ -40,6 +39,7 @@ import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..))
import
Gargantext.Core.Text.Context
(
TermList
)
import
Gargantext.Core.Text.Context
(
TermList
)
import
Gargantext.Core.Text.Corpus.Parsers.CSV
(
csv_title
,
csv_abstract
,
csv_publication_year
,
csv_publication_month
,
csv_publication_day
,
import
Gargantext.Core.Text.Corpus.Parsers.CSV
(
csv_title
,
csv_abstract
,
csv_publication_year
,
csv_publication_month
,
csv_publication_day
,
csv'_source
,
csv'_title
,
csv'_abstract
,
csv'_publication_year
,
csv'_publication_month
,
csv'_publication_day
,
csv'_weight
)
csv'_source
,
csv'_title
,
csv'_abstract
,
csv'_publication_year
,
csv'_publication_month
,
csv'_publication_day
,
csv'_weight
)
import
qualified
Gargantext.Core.Text.Corpus.Parsers.CSV
as
Csv
import
Gargantext.Core.Text.Corpus.Parsers
(
FileFormat
(
..
),
parseFile
)
import
Gargantext.Core.Text.Corpus.Parsers
(
FileFormat
(
..
),
parseFile
)
import
Gargantext.Core.Text.List.Formats.CSV
(
csvMapTermList
)
import
Gargantext.Core.Text.List.Formats.CSV
(
csvMapTermList
)
import
Gargantext.Core.Text.Terms.WithList
(
Patterns
,
buildPatterns
,
extractTermsWithList
)
import
Gargantext.Core.Text.Terms.WithList
(
Patterns
,
buildPatterns
,
extractTermsWithList
)
...
@@ -152,8 +152,8 @@ csvToDocs parser patterns time path =
...
@@ -152,8 +152,8 @@ csvToDocs parser patterns time path =
Right
r
->
Right
r
->
pure
$
Vector
.
toList
pure
$
Vector
.
toList
$
Vector
.
take
limit
$
Vector
.
take
limit
$
Vector
.
map
(
\
row
->
Document
(
toPhyloDate
(
csv_publication_year
row
)
(
csv_publication_month
row
)
(
csv_publication_day
row
)
time
)
$
Vector
.
map
(
\
row
->
Document
(
toPhyloDate
(
Csv
.
unIntOrDec
$
csv_publication_year
row
)
(
csv_publication_month
row
)
(
csv_publication_day
row
)
time
)
(
toPhyloDate'
(
csv_publication_year
row
)
(
csv_publication_month
row
)
(
csv_publication_day
row
))
(
toPhyloDate'
(
Csv
.
unIntOrDec
$
csv_publication_year
row
)
(
csv_publication_month
row
)
(
csv_publication_day
row
))
(
termsInText
patterns
$
(
csv_title
row
)
<>
" "
<>
(
csv_abstract
row
))
(
termsInText
patterns
$
(
csv_title
row
)
<>
" "
<>
(
csv_abstract
row
))
Nothing
Nothing
[]
[]
...
...
bin/gargantext-cli/Main.hs
View file @
09e9fa50
...
@@ -42,7 +42,7 @@ import Gargantext.Core.Types
...
@@ -42,7 +42,7 @@ import Gargantext.Core.Types
import
Gargantext.Core.Text.Terms
import
Gargantext.Core.Text.Terms
import
Gargantext.Core.Text.Context
import
Gargantext.Core.Text.Context
import
Gargantext.Core.Text.Terms.WithList
import
Gargantext.Core.Text.Terms.WithList
import
Gargantext.Core.Text.Corpus.Parsers.CSV
(
readFile
,
csv_title
,
csv_abstract
,
csv_publication_year
)
import
Gargantext.Core.Text.Corpus.Parsers.CSV
(
readFile
,
csv_title
,
csv_abstract
,
csv_publication_year
,
unIntOrDec
)
import
Gargantext.Core.Text.List.Formats.CSV
(
csvMapTermList
)
import
Gargantext.Core.Text.List.Formats.CSV
(
csvMapTermList
)
import
Gargantext.Core.Text.Terms
(
terms
)
import
Gargantext.Core.Text.Terms
(
terms
)
import
Gargantext.Core.Text.Metrics.Count
(
coocOnContexts
,
Coocs
)
import
Gargantext.Core.Text.Metrics.Count
(
coocOnContexts
,
Coocs
)
...
@@ -91,7 +91,7 @@ main = do
...
@@ -91,7 +91,7 @@ main = do
Right
cf
->
do
Right
cf
->
do
let
corpus
=
DM
.
fromListWith
(
<>
)
let
corpus
=
DM
.
fromListWith
(
<>
)
.
DV
.
toList
.
DV
.
toList
.
DV
.
map
(
\
n
->
(
csv_publication_year
n
,
[(
csv_title
n
)
<>
" "
<>
(
csv_abstract
n
)]))
.
DV
.
map
(
\
n
->
(
unIntOrDec
$
csv_publication_year
n
,
[(
csv_title
n
)
<>
" "
<>
(
csv_abstract
n
)]))
.
snd
$
cf
.
snd
$
cf
-- termListMap :: [Text]
-- termListMap :: [Text]
...
...
src/Gargantext/Core/Text/Corpus/Parsers/CSV.hs
View file @
09e9fa50
...
@@ -85,7 +85,7 @@ toDoc (CsvGargV3 did dt _ dpy dpm dpd dab dau) =
...
@@ -85,7 +85,7 @@ toDoc (CsvGargV3 did dt _ dpy dpm dpd dab dau) =
-- | Types Conversions
-- | Types Conversions
toDocs
::
Vector
CsvDoc
->
[
CsvGargV3
]
toDocs
::
Vector
CsvDoc
->
[
CsvGargV3
]
toDocs
v
=
V
.
toList
toDocs
v
=
V
.
toList
$
V
.
zipWith
(
\
nId
(
CsvDoc
t
s
py
pm
pd
abst
auth
)
$
V
.
zipWith
(
\
nId
(
CsvDoc
t
s
(
IntOrDec
py
)
pm
pd
abst
auth
)
->
CsvGargV3
nId
t
s
py
pm
pd
abst
auth
)
->
CsvGargV3
nId
t
s
py
pm
pd
abst
auth
)
(
V
.
enumFromN
1
(
V
.
length
v''
))
v''
(
V
.
enumFromN
1
(
V
.
length
v''
))
v''
where
where
...
@@ -96,7 +96,7 @@ toDocs v = V.toList
...
@@ -96,7 +96,7 @@ toDocs v = V.toList
fromDocs
::
Vector
CsvGargV3
->
Vector
CsvDoc
fromDocs
::
Vector
CsvGargV3
->
Vector
CsvDoc
fromDocs
docs
=
V
.
map
fromDocs'
docs
fromDocs
docs
=
V
.
map
fromDocs'
docs
where
where
fromDocs'
(
CsvGargV3
_
t
s
py
pm
pd
abst
auth
)
=
(
CsvDoc
t
s
py
pm
pd
abst
auth
)
fromDocs'
(
CsvGargV3
_
t
s
py
pm
pd
abst
auth
)
=
(
CsvDoc
t
s
(
IntOrDec
py
)
pm
pd
abst
auth
)
---------------------------------------------------------------
---------------------------------------------------------------
-- | Split a document in its context
-- | Split a document in its context
...
@@ -139,10 +139,21 @@ docsSize csvDoc = mean ls
...
@@ -139,10 +139,21 @@ docsSize csvDoc = mean ls
---------------------------------------------------------------
---------------------------------------------------------------
newtype
IntOrDec
=
IntOrDec
Int
deriving
(
Show
,
Eq
,
Read
)
unIntOrDec
::
IntOrDec
->
Int
unIntOrDec
(
IntOrDec
i
)
=
i
instance
FromField
IntOrDec
where
parseField
s
=
case
runParser
(
parseField
s
::
Parser
Int
)
of
Left
_err
->
IntOrDec
<$>
Prelude
.
floor
<$>
(
parseField
s
::
Parser
Double
)
Right
n
->
pure
$
IntOrDec
n
instance
ToField
IntOrDec
where
toField
(
IntOrDec
i
)
=
toField
i
data
CsvDoc
=
CsvDoc
data
CsvDoc
=
CsvDoc
{
csv_title
::
!
Text
{
csv_title
::
!
Text
,
csv_source
::
!
Text
,
csv_source
::
!
Text
,
csv_publication_year
::
!
Int
,
csv_publication_year
::
!
Int
OrDec
,
csv_publication_month
::
!
Int
,
csv_publication_month
::
!
Int
,
csv_publication_day
::
!
Int
,
csv_publication_day
::
!
Int
,
csv_abstract
::
!
Text
,
csv_abstract
::
!
Text
...
@@ -151,13 +162,13 @@ data CsvDoc = CsvDoc
...
@@ -151,13 +162,13 @@ data CsvDoc = CsvDoc
deriving
(
Show
)
deriving
(
Show
)
instance
FromNamedRecord
CsvDoc
where
instance
FromNamedRecord
CsvDoc
where
parseNamedRecord
r
=
CsvDoc
<$>
r
.:
"title"
parseNamedRecord
r
=
CsvDoc
<$>
(
r
.:
"title"
<|>
r
.:
"Title"
)
<*>
r
.:
"source"
<*>
(
r
.:
"source"
<|>
r
.:
"Source"
)
<*>
r
.:
"publication_year"
<*>
(
r
.:
"publication_year"
<|>
r
.:
"Publication Year"
)
<*>
r
.:
"publication_month"
<*>
(
r
.:
"publication_month"
<|>
r
.:
"Publication Month"
)
<*>
r
.:
"publication_day"
<*>
(
r
.:
"publication_day"
<|>
r
.:
"Publication Day"
)
<*>
r
.:
"abstract"
<*>
(
r
.:
"abstract"
<|>
r
.:
"Abstract"
)
<*>
r
.:
"authors"
<*>
(
r
.:
"authors"
<|>
r
.:
"Authors"
)
instance
ToNamedRecord
CsvDoc
where
instance
ToNamedRecord
CsvDoc
where
toNamedRecord
(
CsvDoc
t
s
py
pm
pd
abst
aut
)
=
toNamedRecord
(
CsvDoc
t
s
py
pm
pd
abst
aut
)
=
...
@@ -173,7 +184,7 @@ instance ToNamedRecord CsvDoc where
...
@@ -173,7 +184,7 @@ instance ToNamedRecord CsvDoc where
hyperdataDocument2csvDoc
::
HyperdataDocument
->
CsvDoc
hyperdataDocument2csvDoc
::
HyperdataDocument
->
CsvDoc
hyperdataDocument2csvDoc
h
=
CsvDoc
(
m
$
_hd_title
h
)
hyperdataDocument2csvDoc
h
=
CsvDoc
(
m
$
_hd_title
h
)
(
m
$
_hd_source
h
)
(
m
$
_hd_source
h
)
(
mI
$
_hd_publication_year
h
)
(
IntOrDec
$
mI
$
_hd_publication_year
h
)
(
mI
$
_hd_publication_month
h
)
(
mI
$
_hd_publication_month
h
)
(
mI
$
_hd_publication_day
h
)
(
mI
$
_hd_publication_day
h
)
(
m
$
_hd_abstract
h
)
(
m
$
_hd_abstract
h
)
...
@@ -357,7 +368,7 @@ csvHal2doc (CsvHal title source
...
@@ -357,7 +368,7 @@ csvHal2doc (CsvHal title source
csv2doc
::
CsvDoc
->
HyperdataDocument
csv2doc
::
CsvDoc
->
HyperdataDocument
csv2doc
(
CsvDoc
title
source
csv2doc
(
CsvDoc
title
source
pub_year
pub_month
pub_day
(
IntOrDec
pub_year
)
pub_month
pub_day
abstract
authors
)
=
HyperdataDocument
(
Just
"CsvHal"
)
abstract
authors
)
=
HyperdataDocument
(
Just
"CsvHal"
)
Nothing
Nothing
Nothing
Nothing
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment