Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
H
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Przemyslaw Kaminski
haskell-gargantext
Commits
d388d621
Commit
d388d621
authored
May 05, 2019
by
Alexandre Delanoë
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
[PARSERS] refactor, split, organize (TODO: tests parsers).
parent
64e0bc84
Changes
7
Show whitespace changes
Inline
Side-by-side
Showing
7 changed files
with
169 additions
and
146 deletions
+169
-146
Parsers.hs
src/Gargantext/Text/Parsers.hs
+11
-11
Date.hs
src/Gargantext/Text/Parsers/Date.hs
+1
-81
Attoparsec.hs
src/Gargantext/Text/Parsers/Date/Attoparsec.hs
+39
-0
Parsec.hs
src/Gargantext/Text/Parsers/Date/Parsec.hs
+96
-0
RIS.hs
src/Gargantext/Text/Parsers/RIS.hs
+7
-10
Presse.hs
src/Gargantext/Text/Parsers/RIS/Presse.hs
+5
-25
WOS.hs
src/Gargantext/Text/Parsers/WOS.hs
+10
-19
No files found.
src/Gargantext/Text/Parsers.hs
View file @
d388d621
...
@@ -40,22 +40,20 @@ import Data.String()
...
@@ -40,22 +40,20 @@ import Data.String()
import
Data.Text
(
Text
)
import
Data.Text
(
Text
)
import
Data.Text.Encoding
(
decodeUtf8
)
import
Data.Text.Encoding
(
decodeUtf8
)
import
Data.Time
(
UTCTime
(
..
))
import
Data.Time
(
UTCTime
(
..
))
import
Data.Tuple.Extra
(
both
,
second
)
import
Data.Tuple.Extra
(
both
,
first
,
second
)
import
System.FilePath
(
FilePath
(),
takeExtension
)
import
System.FilePath
(
FilePath
(),
takeExtension
)
import
qualified
Data.ByteString
as
DB
import
qualified
Data.ByteString
as
DB
import
qualified
Data.Map
as
DM
import
qualified
Data.Map
as
DM
import
qualified
Data.Text
as
DT
import
qualified
Data.Text
as
DT
import
qualified
Data.Time
as
DT
import
qualified
Data.Time
as
DT
------------------------------------------------------------------------
import
Gargantext.Core
(
Lang
(
..
))
import
Gargantext.Core
(
Lang
(
..
))
import
Gargantext.Prelude
import
Gargantext.Prelude
import
Gargantext.Database.Types.Node
(
HyperdataDocument
(
..
))
import
Gargantext.Database.Types.Node
(
HyperdataDocument
(
..
))
import
Gargantext.Text.Parsers.WOS
(
wosParser
)
import
qualified
Gargantext.Text.Parsers.WOS
as
WOS
import
Gargantext.Text.Parsers.RIS
(
risParser
)
import
qualified
Gargantext.Text.Parsers.RIS
as
RIS
import
Gargantext.Text.Parsers.RIS.Presse
(
presseEnrich
)
import
Gargantext.Text.Parsers.RIS.Presse
(
presseEnrich
)
import
Gargantext.Text.Parsers.Date
(
parseDate
)
import
Gargantext.Text.Parsers.Date
(
parseDate
)
import
Gargantext.Text.Parsers.CSV
(
parseHal
,
writeDocs2Csv
)
import
Gargantext.Text.Parsers.CSV
(
parseHal
)
import
Gargantext.Text.Terms.Stop
(
detectLang
)
import
Gargantext.Text.Terms.Stop
(
detectLang
)
------------------------------------------------------------------------
------------------------------------------------------------------------
...
@@ -70,7 +68,8 @@ type ParseError = String
...
@@ -70,7 +68,8 @@ type ParseError = String
-- | According to the format of Input file,
-- | According to the format of Input file,
-- different parser are available.
-- different parser are available.
data
FileFormat
=
WOS
|
RIS
|
CsvHalFormat
|
RisPresse
-- | CsvGargV3
data
FileFormat
=
WOS
|
RIS
|
RisPresse
|
CsvGargV3
|
CsvHalFormat
deriving
(
Show
)
deriving
(
Show
)
-- Implemented (ISI Format)
-- Implemented (ISI Format)
...
@@ -88,7 +87,8 @@ data FileFormat = WOS | RIS | CsvHalFormat | RisPresse -- | CsvGargV3
...
@@ -88,7 +87,8 @@ data FileFormat = WOS | RIS | CsvHalFormat | RisPresse -- | CsvGargV3
parseDocs
::
FileFormat
->
FilePath
->
IO
[
HyperdataDocument
]
parseDocs
::
FileFormat
->
FilePath
->
IO
[
HyperdataDocument
]
parseDocs
CsvHalFormat
p
=
parseHal
p
parseDocs
CsvHalFormat
p
=
parseHal
p
parseDocs
RisPresse
p
=
join
$
mapM
(
toDoc
RIS
)
<$>
snd
<$>
enrichWith
presseEnrich
<$>
parse'
RIS
p
parseDocs
RisPresse
p
=
join
$
mapM
(
toDoc
RIS
)
<$>
snd
<$>
enrichWith
presseEnrich
<$>
parse'
RIS
p
parseDocs
ff
path
=
join
$
mapM
(
toDoc
ff
)
<$>
snd
<$>
parse
ff
path
parseDocs
WOS
p
=
join
$
mapM
(
toDoc
WOS
)
<$>
snd
<$>
enrichWith
(
map
(
first
WOS
.
keys
))
<$>
parse'
WOS
p
parseDocs
ff
p
=
join
$
mapM
(
toDoc
ff
)
<$>
snd
<$>
parse
ff
p
type
Year
=
Int
type
Year
=
Int
type
Month
=
Int
type
Month
=
Int
...
@@ -159,8 +159,8 @@ parse' format path = do
...
@@ -159,8 +159,8 @@ parse' format path = do
-- According to the format of the text, choose the right parser.
-- According to the format of the text, choose the right parser.
-- TODO withParser :: FileFormat -> Parser [Document]
-- TODO withParser :: FileFormat -> Parser [Document]
withParser
::
FileFormat
->
Parser
[[(
DB
.
ByteString
,
DB
.
ByteString
)]]
withParser
::
FileFormat
->
Parser
[[(
DB
.
ByteString
,
DB
.
ByteString
)]]
withParser
WOS
=
wosP
arser
withParser
WOS
=
WOS
.
p
arser
withParser
RIS
=
risP
arser
withParser
RIS
=
RIS
.
p
arser
--withParser ODT = odtParser
--withParser ODT = odtParser
--withParser XML = xmlParser
--withParser XML = xmlParser
withParser
_
=
panic
"[ERROR] Parser not implemented yet"
withParser
_
=
panic
"[ERROR] Parser not implemented yet"
...
...
src/Gargantext/Text/Parsers/Date.hs
View file @
d388d621
...
@@ -18,7 +18,7 @@ DGP.parseDateRaw DGP.FR "12 avril 2010" == "2010-04-12T00:00:00.000+00:00"
...
@@ -18,7 +18,7 @@ DGP.parseDateRaw DGP.FR "12 avril 2010" == "2010-04-12T00:00:00.000+00:00"
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE OverloadedStrings #-}
module
Gargantext.Text.Parsers.Date
(
parseDate
,
parseDateRaw
,
parseGregorian
,
wrapDST
)
where
module
Gargantext.Text.Parsers.Date
(
parseDate
,
parseDateRaw
)
where
import
Data.HashMap.Strict
as
HM
hiding
(
map
)
import
Data.HashMap.Strict
as
HM
hiding
(
map
)
import
Data.Text
(
Text
,
unpack
,
splitOn
,
pack
)
import
Data.Text
(
Text
,
unpack
,
splitOn
,
pack
)
...
@@ -37,25 +37,6 @@ import qualified Data.Aeson as Json
...
@@ -37,25 +37,6 @@ import qualified Data.Aeson as Json
import
qualified
Data.HashSet
as
HashSet
import
qualified
Data.HashSet
as
HashSet
import
qualified
Duckling.Core
as
DC
import
qualified
Duckling.Core
as
DC
-- | Unused import (to parse Date Format, keeping it for maybe next steps)
import
Control.Monad
((
=<<
))
import
Data.Either
(
Either
)
import
Data.Fixed
(
Fixed
(
MkFixed
))
import
Data.Foldable
(
length
)
import
Data.String
(
String
)
import
Data.Time
(
ZonedTime
(
..
),
LocalTime
(
..
),
TimeZone
(
..
),
TimeOfDay
(
..
))
import
Data.Time.Calendar
(
Day
,
fromGregorian
)
import
Duckling.Debug
as
DB
import
Duckling.Engine
(
parseAndResolve
)
import
Duckling.Rules
(
rulesFor
)
import
Prelude
(
toInteger
,
div
,
otherwise
,
(
++
))
import
Text.Parsec.Error
(
ParseError
)
import
Text.Parsec.Prim
(
Stream
,
ParsecT
)
import
Text.Parsec.String
(
Parser
)
import
Text.ParserCombinators.Parsec
(
many1
,
noneOf
,
anyChar
,
char
,
oneOf
)
import
Text.XML.HXT.DOM.Util
(
decimalStringToInt
)
import
qualified
Text.ParserCombinators.Parsec
(
parse
)
------------------------------------------------------------------------
------------------------------------------------------------------------
-- | Date Parser
-- | Date Parser
-- Parses dates mentions in full text given the language.
-- Parses dates mentions in full text given the language.
...
@@ -120,64 +101,3 @@ parseDateWithDuckling lang input = do
...
@@ -120,64 +101,3 @@ parseDateWithDuckling lang input = do
--pure $ parseAndResolve (rulesFor (locale ctx) (HashSet.fromList [(This Time)])) input ctx
--pure $ parseAndResolve (rulesFor (locale ctx) (HashSet.fromList [(This Time)])) input ctx
pure
$
analyze
input
contxt
$
HashSet
.
fromList
[(
This
Time
)]
pure
$
analyze
input
contxt
$
HashSet
.
fromList
[(
This
Time
)]
-- | Permit to transform a String to an Int in a monadic context
wrapDST
::
Monad
m
=>
String
->
m
Int
wrapDST
=
return
.
decimalStringToInt
-- | Generic parser which take at least one element not given in argument
many1NoneOf
::
Stream
s
m
Char
=>
[
Char
]
->
ParsecT
s
u
m
[
Char
]
many1NoneOf
=
(
many1
.
noneOf
)
--getMultiplicator :: Int -> Int
getMultiplicator
a
|
0
>=
a
=
1
|
otherwise
=
10
*
(
getMultiplicator
$
div
a
10
)
-- | Parser for date format y-m-d
parseGregorian
::
Parser
Day
parseGregorian
=
do
y
<-
wrapDST
=<<
many1NoneOf
[
'-'
]
_
<-
char
'-'
m
<-
wrapDST
=<<
many1NoneOf
[
'-'
]
_
<-
char
'-'
d
<-
wrapDST
=<<
many1NoneOf
[
'T'
]
_
<-
char
'T'
return
$
fromGregorian
(
toInteger
y
)
m
d
---- | Parser for time format h:m:s
parseTimeOfDay
::
Parser
TimeOfDay
parseTimeOfDay
=
do
h
<-
wrapDST
=<<
many1NoneOf
[
':'
]
_
<-
char
':'
m
<-
wrapDST
=<<
many1NoneOf
[
':'
]
_
<-
char
':'
r
<-
many1NoneOf
[
'.'
]
_
<-
char
'.'
dec
<-
many1NoneOf
[
'+'
,
'-'
]
let
(
nb
,
l
)
=
(
decimalStringToInt
$
r
++
dec
,
length
dec
)
seconds
=
nb
*
10
^
(
12
-
l
)
return
$
TimeOfDay
h
m
(
MkFixed
.
toInteger
$
seconds
)
-- | Parser for timezone format +hh:mm
parseTimeZone
::
Parser
TimeZone
parseTimeZone
=
do
sign
<-
oneOf
[
'+'
,
'-'
]
h
<-
wrapDST
=<<
many1NoneOf
[
':'
]
_
<-
char
':'
m
<-
wrapDST
=<<
(
many1
$
anyChar
)
let
timeInMinute
=
if
sign
==
'+'
then
h
*
60
+
m
else
-
h
*
60
-
m
in
return
$
TimeZone
timeInMinute
False
"CET"
---- | Parser which use parseGregorian, parseTimeOfDay and parseTimeZone to create a ZonedTime
parseZonedTime
::
Parser
ZonedTime
parseZonedTime
=
do
d
<-
parseGregorian
tod
<-
parseTimeOfDay
tz
<-
parseTimeZone
return
$
ZonedTime
(
LocalTime
d
(
tod
))
tz
---- | Opposite of toRFC3339
fromRFC3339
::
Text
->
Either
ParseError
ZonedTime
fromRFC3339
t
=
Text
.
ParserCombinators
.
Parsec
.
parse
parseZonedTime
"ERROR: Couldn't parse zoned time."
input
where
input
=
unpack
t
src/Gargantext/Text/Parsers/Date/Attoparsec.hs
0 → 100644
View file @
d388d621
{-|
Module : Gargantext.Text.Parsers.Date.Attoparsec
Description :
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE OverloadedStrings #-}
module
Gargantext.Text.Parsers.Date.Attoparsec
where
import
Control.Applicative
((
<*
))
import
Data.Attoparsec.ByteString
(
Parser
,
take
)
import
Data.ByteString
(
ByteString
)
import
Data.Monoid
((
<>
))
import
Data.Tuple.Extra
(
first
)
import
Gargantext.Prelude
hiding
(
takeWhile
,
take
)
-------------------------------------------------------------
parserWith
::
Parser
ByteString
->
Parser
[(
ByteString
,
ByteString
)]
parserWith
sep
=
do
day
<-
take
2
<*
sep
mon
<-
take
2
<*
sep
yea
<-
take
4
pure
$
map
(
first
(
\
x
->
"publication_"
<>
x
))
[
(
"day"
,
day
)
,
(
"month"
,
mon
)
,
(
"year"
,
yea
)
,
(
"date"
,
yea
<>
"-"
<>
mon
<>
"-"
<>
day
<>
"T0:0:0"
)
]
src/Gargantext/Text/Parsers/Date/Parsec.hs
0 → 100644
View file @
d388d621
{-|
Module : Gargantext.Text.Parsers.Date
Description : Some utils to parse dates
Copyright : (c) CNRS 2017-present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE OverloadedStrings #-}
module
Gargantext.Text.Parsers.Date.Parsec
where
import
Control.Monad
((
=<<
))
import
Data.Either
(
Either
)
import
Data.Fixed
(
Fixed
(
MkFixed
))
import
Data.Foldable
(
length
)
import
Data.String
(
String
)
import
Data.Text
(
Text
,
unpack
)
import
Data.Time
(
ZonedTime
(
..
),
LocalTime
(
..
),
TimeZone
(
..
),
TimeOfDay
(
..
))
import
Data.Time.Calendar
(
Day
,
fromGregorian
)
import
Gargantext.Prelude
import
Prelude
(
toInteger
,
div
,
otherwise
,
(
++
))
import
Text.Parsec.Error
(
ParseError
)
import
Text.Parsec.Prim
(
Stream
,
ParsecT
)
import
Text.Parsec.String
(
Parser
)
import
Text.ParserCombinators.Parsec
(
many1
,
noneOf
,
anyChar
,
char
,
oneOf
)
import
Text.XML.HXT.DOM.Util
(
decimalStringToInt
)
import
qualified
Text.ParserCombinators.Parsec
(
parse
)
-- | Permit to transform a String to an Int in a monadic context
wrapDST
::
Monad
m
=>
String
->
m
Int
wrapDST
=
return
.
decimalStringToInt
-- | Generic parser which take at least one element not given in argument
many1NoneOf
::
Stream
s
m
Char
=>
[
Char
]
->
ParsecT
s
u
m
[
Char
]
many1NoneOf
=
(
many1
.
noneOf
)
getMultiplicator
::
Int
->
Int
getMultiplicator
a
|
0
>=
a
=
1
|
otherwise
=
10
*
(
getMultiplicator
$
div
a
10
)
-- | Parser for date format y-m-d
parseGregorian
::
Parser
Day
parseGregorian
=
do
y
<-
wrapDST
=<<
many1NoneOf
[
'-'
]
_
<-
char
'-'
m
<-
wrapDST
=<<
many1NoneOf
[
'-'
]
_
<-
char
'-'
d
<-
wrapDST
=<<
many1NoneOf
[
'T'
]
_
<-
char
'T'
return
$
fromGregorian
(
toInteger
y
)
m
d
---- | Parser for time format h:m:s
parseTimeOfDay
::
Parser
TimeOfDay
parseTimeOfDay
=
do
h
<-
wrapDST
=<<
many1NoneOf
[
':'
]
_
<-
char
':'
m
<-
wrapDST
=<<
many1NoneOf
[
':'
]
_
<-
char
':'
r
<-
many1NoneOf
[
'.'
]
_
<-
char
'.'
dec
<-
many1NoneOf
[
'+'
,
'-'
]
let
(
nb
,
l
)
=
(
decimalStringToInt
$
r
++
dec
,
length
dec
)
seconds
=
nb
*
10
^
(
12
-
l
)
return
$
TimeOfDay
h
m
(
MkFixed
.
toInteger
$
seconds
)
-- | Parser for timezone format +hh:mm
parseTimeZone
::
Parser
TimeZone
parseTimeZone
=
do
sign
<-
oneOf
[
'+'
,
'-'
]
h
<-
wrapDST
=<<
many1NoneOf
[
':'
]
_
<-
char
':'
m
<-
wrapDST
=<<
(
many1
$
anyChar
)
let
timeInMinute
=
if
sign
==
'+'
then
h
*
60
+
m
else
-
h
*
60
-
m
in
return
$
TimeZone
timeInMinute
False
"CET"
---- | Parser which use parseGregorian, parseTimeOfDay and parseTimeZone to create a ZonedTime
parseZonedTime
::
Parser
ZonedTime
parseZonedTime
=
do
d
<-
parseGregorian
tod
<-
parseTimeOfDay
tz
<-
parseTimeZone
return
$
ZonedTime
(
LocalTime
d
(
tod
))
tz
---- | Opposite of toRFC3339
fromRFC3339
::
Text
->
Either
ParseError
ZonedTime
fromRFC3339
t
=
Text
.
ParserCombinators
.
Parsec
.
parse
parseZonedTime
"ERROR: Couldn't parse zoned time."
input
where
input
=
unpack
t
src/Gargantext/Text/Parsers/RIS.hs
View file @
d388d621
...
@@ -19,23 +19,20 @@ citation programs to exchange data.
...
@@ -19,23 +19,20 @@ citation programs to exchange data.
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE OverloadedStrings #-}
module
Gargantext.Text.Parsers.RIS
(
risP
arser
,
withField
,
fieldWith
,
lines
)
where
module
Gargantext.Text.Parsers.RIS
(
p
arser
,
withField
,
fieldWith
,
lines
)
where
import
Data.Either
(
either
)
import
Data.List
(
lookup
)
import
Data.List
(
lookup
)
import
Data.Tuple.Extra
(
first
)
import
Control.Applicative
import
Control.Applicative
import
Data.Attoparsec.ByteString
(
Parser
,
try
,
string
,
takeTill
,
take
,
manyTill
,
many1
,
endOfInput
,
parseOnly
)
import
Data.Attoparsec.ByteString
(
Parser
,
try
,
takeTill
,
take
,
many1
)
import
Data.Attoparsec.ByteString.Char8
(
anyChar
,
isEndOfLine
)
import
Data.Attoparsec.ByteString.Char8
(
isEndOfLine
)
import
Data.ByteString
(
ByteString
,
concat
,
length
)
import
Data.ByteString
(
ByteString
,
concat
)
import
Data.ByteString.Char8
(
pack
)
import
Data.Monoid
((
<>
))
import
Data.Monoid
((
<>
))
import
Gargantext.Prelude
hiding
(
takeWhile
,
take
,
concat
,
readFile
,
lines
,
concat
)
import
Gargantext.Prelude
hiding
(
takeWhile
,
take
)
import
qualified
Data.List
as
DL
import
qualified
Data.List
as
DL
-------------------------------------------------------------
-------------------------------------------------------------
risP
arser
::
Parser
[[(
ByteString
,
ByteString
)]]
p
arser
::
Parser
[[(
ByteString
,
ByteString
)]]
risP
arser
=
do
p
arser
=
do
n
<-
notice
"TY -"
n
<-
notice
"TY -"
ns
<-
many1
(
notice
"
\n
TY -"
)
ns
<-
many1
(
notice
"
\n
TY -"
)
pure
$
[
n
]
<>
ns
pure
$
[
n
]
<>
ns
...
...
src/Gargantext/Text/Parsers/RIS/Presse.hs
View file @
d388d621
...
@@ -17,19 +17,13 @@ Presse RIS format parser en enricher.
...
@@ -17,19 +17,13 @@ Presse RIS format parser en enricher.
module
Gargantext.Text.Parsers.RIS.Presse
(
presseEnrich
)
where
module
Gargantext.Text.Parsers.RIS.Presse
(
presseEnrich
)
where
import
Data.Either
(
either
)
import
Data.Either
(
either
)
import
Data.List
(
lookup
)
import
Data.Tuple.Extra
(
first
)
import
Data.Tuple.Extra
(
first
)
import
Control.Applicative
import
Data.Attoparsec.ByteString
(
parseOnly
)
import
Data.Attoparsec.ByteString
(
Parser
,
try
,
string
,
takeTill
,
take
,
manyTill
,
many1
,
endOfInput
,
parseOnly
)
import
Data.ByteString
(
ByteString
)
import
Data.Attoparsec.ByteString.Char8
(
anyChar
,
isEndOfLine
)
import
Gargantext.Prelude
hiding
(
takeWhile
,
take
)
import
Data.ByteString
(
ByteString
,
concat
,
length
)
import
Data.ByteString.Char8
(
pack
)
import
Data.Monoid
((
<>
))
import
Gargantext.Prelude
hiding
(
takeWhile
,
take
,
concat
,
readFile
,
lines
,
concat
)
import
Gargantext.Text.Parsers.RIS
(
withField
)
import
Gargantext.Text.Parsers.RIS
(
withField
)
import
qualified
Data.List
as
DL
import
qualified
Gargantext.Text.Parsers.Date.Attoparsec
as
Date
-------------------------------------------------------------
-------------------------------------------------------------
-------------------------------------------------------------
-------------------------------------------------------------
presseEnrich
::
[(
ByteString
,
ByteString
)]
->
[(
ByteString
,
ByteString
)]
presseEnrich
::
[(
ByteString
,
ByteString
)]
->
[(
ByteString
,
ByteString
)]
presseEnrich
=
(
withField
"DA"
presseDate
)
presseEnrich
=
(
withField
"DA"
presseDate
)
...
@@ -37,26 +31,13 @@ presseEnrich = (withField "DA" presseDate)
...
@@ -37,26 +31,13 @@ presseEnrich = (withField "DA" presseDate)
.
(
map
(
first
presseFields
))
.
(
map
(
first
presseFields
))
presseDate
::
ByteString
->
[(
ByteString
,
ByteString
)]
presseDate
::
ByteString
->
[(
ByteString
,
ByteString
)]
presseDate
str
=
either
(
const
[]
)
identity
$
parseOnly
parseDate
str
presseDate
str
=
either
(
const
[]
)
identity
$
parseOnly
(
Date
.
parserWith
"/"
)
str
parseDate
::
Parser
[(
ByteString
,
ByteString
)]
parseDate
=
do
day
<-
take
2
<*
"/"
mon
<-
take
2
<*
"/"
yea
<-
take
4
pure
$
map
(
first
(
\
x
->
"publication_"
<>
x
))
[
(
"day"
,
day
)
,
(
"month"
,
mon
)
,
(
"year"
,
yea
)
,
(
"date"
,
yea
<>
"-"
<>
mon
<>
"-"
<>
day
<>
"T0:0:0"
)
]
presseLang
::
ByteString
->
[(
ByteString
,
ByteString
)]
presseLang
::
ByteString
->
[(
ByteString
,
ByteString
)]
presseLang
"Français"
=
[(
"language"
,
"FR"
)]
presseLang
"Français"
=
[(
"language"
,
"FR"
)]
presseLang
"English"
=
[(
"language"
,
"EN"
)]
presseLang
"English"
=
[(
"language"
,
"EN"
)]
presseLang
x
=
[(
"language"
,
x
)]
presseLang
x
=
[(
"language"
,
x
)]
presseFields
::
ByteString
->
ByteString
presseFields
::
ByteString
->
ByteString
presseFields
champs
presseFields
champs
|
champs
==
"AU"
=
"authors"
|
champs
==
"AU"
=
"authors"
...
@@ -67,7 +48,6 @@ presseFields champs
...
@@ -67,7 +48,6 @@ presseFields champs
|
champs
==
"N2"
=
"abstract"
|
champs
==
"N2"
=
"abstract"
|
otherwise
=
champs
|
otherwise
=
champs
{-
{-
fixTitle :: [(ByteString, ByteString)] -> [(ByteString, ByteString)]
fixTitle :: [(ByteString, ByteString)] -> [(ByteString, ByteString)]
fixTitle ns = ns <> [ti, ab]
fixTitle ns = ns <> [ti, ab]
...
...
src/Gargantext/Text/Parsers/WOS.hs
View file @
d388d621
...
@@ -14,28 +14,21 @@ commentary with @some markup@.
...
@@ -14,28 +14,21 @@ commentary with @some markup@.
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE OverloadedStrings #-}
module
Gargantext.Text.Parsers.WOS
(
wosParser
)
where
module
Gargantext.Text.Parsers.WOS
(
parser
,
keys
)
where
-- TOFIX : Should import Gargantext.Prelude here
import
Control.Applicative
import
Prelude
hiding
(
takeWhile
,
take
,
concat
,
readFile
,
lines
,
concat
)
import
Data.Attoparsec.ByteString
(
Parser
,
string
,
takeTill
,
take
,
manyTill
,
many1
)
import
qualified
Data.List
as
DL
import
Data.Monoid
((
<>
))
import
Data.Attoparsec.ByteString
(
Parser
,
try
,
string
,
takeTill
,
take
,
manyTill
,
many1
)
import
Data.Attoparsec.ByteString.Char8
(
anyChar
,
isEndOfLine
)
import
Data.Attoparsec.ByteString.Char8
(
anyChar
,
isEndOfLine
)
import
Data.ByteString
(
ByteString
,
concat
)
import
Data.ByteString
(
ByteString
)
import
Data.ByteString.Char8
(
pack
)
import
Data.ByteString.Char8
(
pack
)
import
Control.Applicative
import
Gargantext.Text.Parsers.RIS
(
fieldWith
)
import
Gargantext.Text.Parsers.RIS
(
fieldWith
,
lines
)
import
Prelude
hiding
(
takeWhile
,
take
,
concat
,
readFile
,
lines
,
concat
)
-------------------------------------------------------------
-------------------------------------------------------------
-- | wosParser parses ISI format from
-- | wosParser parses ISI format from
-- Web Of Science Database
-- Web Of Science Database
wosP
arser
::
Parser
[[(
ByteString
,
ByteString
)]]
p
arser
::
Parser
[[(
ByteString
,
ByteString
)]]
wosP
arser
=
do
p
arser
=
do
-- TODO Warning if version /= 1.0
-- TODO Warning if version /= 1.0
-- FIXME anyChar (string ..) /= exact string "\nVR 1.0" ?
-- FIXME anyChar (string ..) /= exact string "\nVR 1.0" ?
_
<-
manyTill
anyChar
(
string
$
pack
"
\n
VR 1.0"
)
_
<-
manyTill
anyChar
(
string
$
pack
"
\n
VR 1.0"
)
...
@@ -55,8 +48,8 @@ notice = start *> many (fieldWith field) <* end
...
@@ -55,8 +48,8 @@ notice = start *> many (fieldWith field) <* end
end
=
manyTill
anyChar
(
string
$
pack
"
\n
ER
\n
"
)
end
=
manyTill
anyChar
(
string
$
pack
"
\n
ER
\n
"
)
translate
::
ByteString
->
ByteString
keys
::
ByteString
->
ByteString
translate
champs
keys
champs
|
champs
==
"AF"
=
"authors"
|
champs
==
"AF"
=
"authors"
|
champs
==
"TI"
=
"title"
|
champs
==
"TI"
=
"title"
|
champs
==
"SO"
=
"source"
|
champs
==
"SO"
=
"source"
...
@@ -64,5 +57,3 @@ translate champs
...
@@ -64,5 +57,3 @@ translate champs
|
champs
==
"PD"
=
"publication_date"
|
champs
==
"PD"
=
"publication_date"
|
champs
==
"AB"
=
"abstract"
|
champs
==
"AB"
=
"abstract"
|
otherwise
=
champs
|
otherwise
=
champs
-------------------------------------------------------------
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment