Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
H
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Przemyslaw Kaminski
haskell-gargantext
Commits
c45062cb
Commit
c45062cb
authored
May 03, 2019
by
Alexandre Delanoë
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
[CONVERT] convert file between formats (RIS -> CSV).
parent
33f1f510
Changes
5
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
113 additions
and
47 deletions
+113
-47
Convert.hs
src/Gargantext/Text/Convert.hs
+29
-0
Parsers.hs
src/Gargantext/Text/Parsers.hs
+3
-4
CSV.hs
src/Gargantext/Text/Parsers/CSV.hs
+1
-1
RIS.hs
src/Gargantext/Text/Parsers/RIS.hs
+4
-42
Presse.hs
src/Gargantext/Text/Parsers/RIS/Presse.hs
+76
-0
No files found.
src/Gargantext/Text/Convert.hs
0 → 100644
View file @
c45062cb
{-|
Module : Gargantext.Text.Convert
Description : All parsers of Gargantext in one file.
Copyright : (c) CNRS, 2017
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
Format Converter.
-}
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE PackageImports #-}
{-# LANGUAGE OverloadedStrings #-}
module
Gargantext.Text.Convert
(
risPress2csv
)
where
import
System.FilePath
(
FilePath
())
-- , takeExtension)
import
Gargantext.Prelude
import
Gargantext.Text.Parsers.CSV
(
writeDocs2Csv
)
import
Gargantext.Text.Parsers
(
parseDocs
,
FileFormat
(
..
))
risPress2csv
::
FilePath
->
IO
()
risPress2csv
f
=
parseDocs
RisPresse
(
f
<>
".ris"
)
>>=
\
hs
->
writeDocs2Csv
(
f
<>
".csv"
)
hs
src/Gargantext/Text/Parsers.hs
View file @
c45062cb
...
@@ -51,7 +51,8 @@ import Gargantext.Core (Lang(..))
...
@@ -51,7 +51,8 @@ import Gargantext.Core (Lang(..))
import
Gargantext.Prelude
import
Gargantext.Prelude
import
Gargantext.Database.Types.Node
(
HyperdataDocument
(
..
))
import
Gargantext.Database.Types.Node
(
HyperdataDocument
(
..
))
import
Gargantext.Text.Parsers.WOS
(
wosParser
)
import
Gargantext.Text.Parsers.WOS
(
wosParser
)
import
Gargantext.Text.Parsers.RIS
(
risParser
,
presseParser
)
import
Gargantext.Text.Parsers.RIS
(
risParser
)
import
Gargantext.Text.Parsers.RIS.Presse
(
presseEnrich
)
import
Gargantext.Text.Parsers.Date
(
parseDate
)
import
Gargantext.Text.Parsers.Date
(
parseDate
)
import
Gargantext.Text.Parsers.CSV
(
parseHal
,
writeDocs2Csv
)
import
Gargantext.Text.Parsers.CSV
(
parseHal
,
writeDocs2Csv
)
import
Gargantext.Text.Terms.Stop
(
detectLang
)
import
Gargantext.Text.Terms.Stop
(
detectLang
)
...
@@ -85,7 +86,7 @@ data FileFormat = WOS | RIS | CsvHalFormat | RisPresse -- | CsvGargV3
...
@@ -85,7 +86,7 @@ data FileFormat = WOS | RIS | CsvHalFormat | RisPresse -- | CsvGargV3
-- TODO manage errors here
-- TODO manage errors here
parseDocs
::
FileFormat
->
FilePath
->
IO
[
HyperdataDocument
]
parseDocs
::
FileFormat
->
FilePath
->
IO
[
HyperdataDocument
]
parseDocs
CsvHalFormat
p
=
parseHal
p
parseDocs
CsvHalFormat
p
=
parseHal
p
parseDocs
RisPresse
p
=
join
$
mapM
(
toDoc
RIS
)
<$>
snd
<$>
enrichWith
presse
Parser
<$>
parse'
RIS
p
parseDocs
RisPresse
p
=
join
$
mapM
(
toDoc
RIS
)
<$>
snd
<$>
enrichWith
presse
Enrich
<$>
parse'
RIS
p
parseDocs
ff
path
=
join
$
mapM
(
toDoc
ff
)
<$>
snd
<$>
parse
ff
path
parseDocs
ff
path
=
join
$
mapM
(
toDoc
ff
)
<$>
snd
<$>
parse
ff
path
type
Year
=
Int
type
Year
=
Int
...
@@ -132,7 +133,6 @@ toDoc ff d = do
...
@@ -132,7 +133,6 @@ toDoc ff d = do
Nothing
Nothing
Nothing
Nothing
(
Just
$
(
DT
.
pack
.
show
)
lang
)
(
Just
$
(
DT
.
pack
.
show
)
lang
)
toDoc
_
_
=
undefined
parse
::
FileFormat
->
FilePath
->
IO
([
ParseError
],
[[(
Text
,
Text
)]])
parse
::
FileFormat
->
FilePath
->
IO
([
ParseError
],
[[(
Text
,
Text
)]])
parse
ff
fp
=
enrichWith
identity
<$>
parse'
ff
fp
parse
ff
fp
=
enrichWith
identity
<$>
parse'
ff
fp
...
@@ -184,4 +184,3 @@ clean txt = DT.map clean' txt
...
@@ -184,4 +184,3 @@ clean txt = DT.map clean' txt
risPress2csv
f
=
parseDocs
RisPresse
(
f
<>
".ris"
)
>>=
\
hs
->
writeDocs2Csv
(
f
<>
".csv"
)
hs
risPress2csv
f
=
parseDocs
RisPresse
(
f
<>
".ris"
)
>>=
\
hs
->
writeDocs2Csv
(
f
<>
".csv"
)
hs
src/Gargantext/Text/Parsers/CSV.hs
View file @
c45062cb
...
@@ -25,7 +25,7 @@ import Control.Applicative
...
@@ -25,7 +25,7 @@ import Control.Applicative
import
Data.Char
(
ord
)
import
Data.Char
(
ord
)
import
Data.Csv
import
Data.Csv
import
Data.Either
(
Either
(
Left
,
Right
))
import
Data.Either
(
Either
(
Left
,
Right
))
import
Data.Text
(
Text
,
pack
,
length
,
intercalate
,
unpack
)
import
Data.Text
(
Text
,
pack
,
length
,
intercalate
)
import
qualified
Data.ByteString.Lazy
as
BL
import
qualified
Data.ByteString.Lazy
as
BL
import
Data.Time.Segment
(
jour
)
import
Data.Time.Segment
(
jour
)
...
...
src/Gargantext/Text/Parsers/RIS.hs
View file @
c45062cb
...
@@ -19,7 +19,7 @@ citation programs to exchange data.
...
@@ -19,7 +19,7 @@ citation programs to exchange data.
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE OverloadedStrings #-}
module
Gargantext.Text.Parsers.RIS
(
risParser
,
risDate
,
toDate
,
presseParser
)
where
module
Gargantext.Text.Parsers.RIS
(
risParser
,
withField
)
where
import
Data.Either
(
either
)
import
Data.Either
(
either
)
import
Data.List
(
lookup
)
import
Data.List
(
lookup
)
...
@@ -57,7 +57,7 @@ field = do
...
@@ -57,7 +57,7 @@ field = do
let
txts'
=
case
DL
.
length
txts
>
0
of
let
txts'
=
case
DL
.
length
txts
>
0
of
True
->
txts
True
->
txts
False
->
[]
False
->
[]
pure
(
translate
name
,
concat
([
txt
]
<>
txts'
))
pure
(
name
,
concat
([
txt
]
<>
txts'
))
lines
::
Parser
[
ByteString
]
lines
::
Parser
[
ByteString
]
lines
=
many
line
lines
=
many
line
...
@@ -65,46 +65,8 @@ lines = many line
...
@@ -65,46 +65,8 @@ lines = many line
line
::
Parser
ByteString
line
::
Parser
ByteString
line
=
"
\n\n
"
*>
takeTill
isEndOfLine
line
=
"
\n\n
"
*>
takeTill
isEndOfLine
translate
::
ByteString
->
ByteString
translate
champs
|
champs
==
"AU"
=
"authors"
|
champs
==
"TI"
=
"title"
|
champs
==
"JF"
=
"source"
|
champs
==
"LA"
=
"language"
|
champs
==
"DI"
=
"doi"
|
champs
==
"UR"
=
"url"
|
champs
==
"N2"
=
"abstract"
|
otherwise
=
champs
-------------------------------------------------------------
-------------------------------------------------------------
withField
::
ByteString
->
(
ByteString
->
[(
ByteString
,
ByteString
)])
presseParser
::
[(
ByteString
,
ByteString
)]
->
[(
ByteString
,
ByteString
)]
presseParser
=
(
toDate
"DA"
(
\
x
->
either
(
const
[]
)
identity
$
parseOnly
risDate
x
))
.
(
toDate
"LA"
presseLang
)
risDate
::
Parser
[(
ByteString
,
ByteString
)]
risDate
=
do
day
<-
take
2
<*
"/"
mon
<-
take
2
<*
"/"
yea
<-
take
4
pure
$
map
(
first
(
\
x
->
"publication_"
<>
x
))
[
(
"day"
,
day
)
,
(
"month"
,
mon
)
,
(
"year"
,
yea
)
,
(
"date"
,
yea
<>
"-"
<>
mon
<>
"-"
<>
day
<>
"T0:0:0"
)
]
toDate
::
ByteString
->
(
ByteString
->
[(
ByteString
,
ByteString
)])
->
[(
ByteString
,
ByteString
)]
->
[(
ByteString
,
ByteString
)]
->
[(
ByteString
,
ByteString
)]
->
[(
ByteString
,
ByteString
)]
toDate
k
f
m
=
m
<>
(
maybe
[]
f
(
lookup
k
m
)
)
withField
k
f
m
=
m
<>
(
maybe
[]
f
(
lookup
k
m
)
)
presseLang
::
ByteString
->
[(
ByteString
,
ByteString
)]
presseLang
"Français"
=
[(
"language"
,
"FR"
)]
presseLang
"English"
=
[(
"langauge"
,
"EN"
)]
presseLang
_
=
undefined
{-
fixTitle :: [(ByteString, ByteString)] -> [(ByteString, ByteString)]
fixTitle ns = ns <> [ti, ab]
where
ti = case
-}
src/Gargantext/Text/Parsers/RIS/Presse.hs
0 → 100644
View file @
c45062cb
{-|
Module : Gargantext.Text.Parsers.RIS.Presse
Description :
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
Presse RIS format parser en enricher.
-}
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE OverloadedStrings #-}
module
Gargantext.Text.Parsers.RIS.Presse
(
presseEnrich
)
where
import
Data.Either
(
either
)
import
Data.List
(
lookup
)
import
Data.Tuple.Extra
(
first
)
import
Control.Applicative
import
Data.Attoparsec.ByteString
(
Parser
,
try
,
string
,
takeTill
,
take
,
manyTill
,
many1
,
endOfInput
,
parseOnly
)
import
Data.Attoparsec.ByteString.Char8
(
anyChar
,
isEndOfLine
)
import
Data.ByteString
(
ByteString
,
concat
,
length
)
import
Data.ByteString.Char8
(
pack
)
import
Data.Monoid
((
<>
))
import
Gargantext.Prelude
hiding
(
takeWhile
,
take
,
concat
,
readFile
,
lines
,
concat
)
import
Gargantext.Text.Parsers.RIS
(
withField
)
import
qualified
Data.List
as
DL
-------------------------------------------------------------
-------------------------------------------------------------
presseEnrich
::
[(
ByteString
,
ByteString
)]
->
[(
ByteString
,
ByteString
)]
presseEnrich
=
(
withField
"DA"
presseDate
)
.
(
withField
"LA"
presseLang
)
.
(
map
(
first
presseFields
))
presseDate
::
ByteString
->
[(
ByteString
,
ByteString
)]
presseDate
str
=
either
(
const
[]
)
identity
$
parseOnly
parseDate
str
parseDate
::
Parser
[(
ByteString
,
ByteString
)]
parseDate
=
do
day
<-
take
2
<*
"/"
mon
<-
take
2
<*
"/"
yea
<-
take
4
pure
$
map
(
first
(
\
x
->
"publication_"
<>
x
))
[
(
"day"
,
day
)
,
(
"month"
,
mon
)
,
(
"year"
,
yea
)
,
(
"date"
,
yea
<>
"-"
<>
mon
<>
"-"
<>
day
<>
"T0:0:0"
)
]
presseLang
::
ByteString
->
[(
ByteString
,
ByteString
)]
presseLang
"Français"
=
[(
"language"
,
"FR"
)]
presseLang
"English"
=
[(
"language"
,
"EN"
)]
presseLang
_
=
undefined
presseFields
::
ByteString
->
ByteString
presseFields
champs
|
champs
==
"AU"
=
"authors"
|
champs
==
"TI"
=
"title"
|
champs
==
"JF"
=
"source"
|
champs
==
"DI"
=
"doi"
|
champs
==
"UR"
=
"url"
|
champs
==
"N2"
=
"abstract"
|
otherwise
=
champs
{-
fixTitle :: [(ByteString, ByteString)] -> [(ByteString, ByteString)]
fixTitle ns = ns <> [ti, ab]
where
ti = case
-}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment