Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
195
Issues
195
List
Board
Labels
Milestones
Merge Requests
12
Merge Requests
12
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
gargantext
haskell-gargantext
Commits
f40b051d
Commit
f40b051d
authored
Oct 15, 2018
by
Alexandre Delanoë
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
[DATE] parser -> UTCTime
parent
18ceac9c
Changes
5
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
126 additions
and
124 deletions
+126
-124
package.yaml
package.yaml
+8
-7
Core.hs
src/Gargantext/Core.hs
+5
-1
Parsers.hs
src/Gargantext/Text/Parsers.hs
+10
-11
Date.hs
src/Gargantext/Text/Parsers/Date.hs
+102
-104
Wikimedia.hs
src/Gargantext/Text/Parsers/Wikimedia.hs
+1
-1
No files found.
package.yaml
View file @
f40b051d
...
...
@@ -169,12 +169,12 @@ executables:
main
:
Main.hs
source-dirs
:
bin/gargantext-server
ghc-options
:
-
-threaded
-
-rtsopts
-
-with-rtsopts=-N
-
-O2
-
-Wmissing-signatures
-
-Wcompat
-
-Wmissing-signatures
-
-rtsopts
-
-threaded
-
-with-rtsopts=-N
dependencies
:
-
base
-
containers
...
...
@@ -232,11 +232,12 @@ tests:
main
:
Main.hs
source-dirs
:
src-doctest
ghc-options
:
-
-Werror
-
-threaded
-
-O2
-
-Wcompat
-
-Wmissing-signatures
-
-rtsopts
-
-threaded
-
-with-rtsopts=-N
-
-Wmissing-signatures
dependencies
:
-
doctest
-
Glob
...
...
src/Gargantext/Core.hs
View file @
f40b051d
...
...
@@ -25,8 +25,12 @@ module Gargantext.Core
-- - SP == spanish (not implemented yet)
--
-- ... add your language and help us to implement it (:
data
Lang
=
EN
|
FR
-- | DE | SP | CH
-- | All languages supported
-- TODO : DE | SP | CH
data
Lang
=
EN
|
FR
deriving
(
Show
,
Eq
,
Ord
,
Bounded
,
Enum
)
allLangs
::
[
Lang
]
allLangs
=
[
minBound
..
]
src/Gargantext/Text/Parsers.hs
View file @
f40b051d
...
...
@@ -19,12 +19,13 @@ please follow the types.
-}
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE PackageImports #-}
module
Gargantext.Text.Parsers
-- (parse, FileFormat(..)
)
module
Gargantext.Text.Parsers
(
parse
,
FileFormat
(
..
),
clean
)
where
import
System.FilePath
(
FilePath
(),
takeExtension
)
import
Codec.Archive.Zip
(
withArchive
,
getEntry
,
getEntries
)
import
"zip"
Codec.Archive.Zip
(
withArchive
,
getEntry
,
getEntries
)
import
Data.Either.Extra
(
partitionEithers
)
import
Data.List
(
concat
)
...
...
@@ -49,15 +50,13 @@ import Gargantext.Prelude
import
Gargantext.Text.Parsers.WOS
(
wosParser
)
------------------------------------------------------------------------
type
ParseError
=
String
type
Field
=
Text
type
Document
=
DM
.
Map
Field
Text
type
FilesParsed
=
DM
.
Map
FilePath
FileParsed
data
FileParsed
=
FileParsed
{
_fileParsed_errors
::
Maybe
ParseError
,
_fileParsed_result
::
[
Document
]
}
deriving
(
Show
)
--type Field = Text
--type Document = DM.Map Field Text
--type FilesParsed = DM.Map FilePath FileParsed
--data FileParsed = FileParsed { _fileParsed_errors :: Maybe ParseError
-- , _fileParsed_result :: [Document]
-- } deriving (Show)
-- | According to the format of Input file,
...
...
@@ -85,7 +84,7 @@ parse format path = do
-- | withParser:
-- According t
he format of the text, choosing
the right parser.
-- According t
o the format of the text, choose
the right parser.
-- TODO withParser :: FileFormat -> Parser [Document]
withParser
::
FileFormat
->
Parser
[[(
DB
.
ByteString
,
DB
.
ByteString
)]]
withParser
WOS
=
wosParser
...
...
src/Gargantext/Text/Parsers/Date.hs
View file @
f40b051d
...
...
@@ -7,61 +7,66 @@ Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
According to the language of the text, parseDate
1
returns date as Text:
According to the language of the text, parseDate
Raw
returns date as Text:
TODO : Add some tests
import Gargantext.Parsers.Date as DGP
DGP.parseDate
1
DGP.FR "12 avril 2010" == "2010-04-12T00:00:00.000+00:00"
import Gargantext.
Text.
Parsers.Date as DGP
DGP.parseDate
Raw
DGP.FR "12 avril 2010" == "2010-04-12T00:00:00.000+00:00"
-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE FlexibleContexts
#-}
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE OverloadedStrings #-}
module
Gargantext.Text.Parsers.Date
(
parseDate1
,
parseDate
,
fromRFC3339
,
parseTimeOfDay
,
getMultiplicator
)
where
import
Gargantext.Core
(
Lang
(
FR
,
EN
))
import
Gargantext.Prelude
import
Prelude
(
toInteger
,
div
,
otherwise
,
(
++
))
--import Gargantext.Types.Main as G
module
Gargantext.Text.Parsers.Date
(
parseDate
,
parseDateRaw
)
where
import
Data.HashMap.Strict
as
HM
hiding
(
map
)
import
Data.Text
(
Text
,
unpack
,
splitOn
)
import
Data.Time
(
parseTimeOrError
,
defaultTimeLocale
)
import
Data.Time.Clock
(
UTCTime
,
getCurrentTime
)
import
Data.Time.LocalTime.TimeZone.Series
(
zonedTimeToZoneSeriesTime
)
import
Data.Time.LocalTime
(
utc
)
import
Duckling.Resolve
(
fromUTC
,
Context
(
Context
,
referenceTime
,
locale
)
,
DucklingTime
(
DucklingTime
)
)
import
Data.Time.LocalTime.TimeZone.Series
(
zonedTimeToZoneSeriesTime
)
import
Duckling.Api
(
analyze
)
import
Duckling.Core
(
makeLocale
,
Some
(
This
),
Dimension
(
Time
))
import
qualified
Duckling.Core
as
DC
import
Duckling.Types
(
jsonValue
,
Entity
)
import
Duckling.Api
(
analyze
,
parse
)
import
qualified
Data.HashSet
as
HashSet
import
Duckling.Resolve
(
fromUTC
,
Context
(
Context
,
referenceTime
,
locale
),
DucklingTime
(
DucklingTime
))
import
Duckling.Types
(
ResolvedToken
)
import
Duckling.Types
(
jsonValue
)
import
Gargantext.Core
(
Lang
(
FR
,
EN
)
)
import
Gargantext.Prelude
import
qualified
Data.Aeson
as
Json
import
Data.Time
(
ZonedTime
(
..
),
LocalTime
(
..
),
TimeZone
(
..
),
TimeOfDay
(
..
))
import
Data.Time.Calendar
(
Day
,
fromGregorian
)
import
Data.Fixed
(
Fixed
(
MkFixed
))
import
Data.Foldable
(
length
)
import
Data.HashMap.Strict
as
HM
hiding
(
map
)
import
qualified
Data.HashSet
as
HashSet
import
qualified
Duckling.Core
as
DC
import
Control.Monad
((
=<<
))
import
Data.Either
(
Either
)
import
Data.String
(
String
)
import
Data.Text
(
Text
,
unpack
)
-- | Unused import (to parse Date Format, keeping it for maybe next steps)
-- import Control.Monad ((=<<))
-- import Data.Either (Either)
-- import Data.Fixed (Fixed (MkFixed))
-- import Data.Foldable (length)
-- import Data.String (String)
-- import Data.Time (ZonedTime(..), LocalTime(..), TimeZone(..), TimeOfDay(..))
-- import Data.Time.Calendar (Day, fromGregorian)
-- import Duckling.Debug as DB
-- import Duckling.Engine (parseAndResolve)
-- import Duckling.Rules (rulesFor)
-- import Duckling.Debug as DB
-- import Prelude (toInteger, div, otherwise, (++))
-- import Text.Parsec.Error (ParseError)
-- import Text.Parsec.Prim (Stream, ParsecT)
-- import Text.Parsec.String (Parser)
-- import Text.ParserCombinators.Parsec (many1, noneOf, anyChar, char, oneOf)
-- import Text.XML.HXT.DOM.Util (decimalStringToInt)
-- import qualified Text.ParserCombinators.Parsec (parse)
------------------------------------------------------------------------
parseDate
::
Lang
->
Text
->
IO
UTCTime
parseDate
lang
s
=
do
dateStr'
<-
parseDateRaw
lang
s
let
format
=
"%Y-%m-%dT%T"
let
dateStr
=
unpack
$
maybe
"0-0-0T0:0:0"
identity
$
head
$
splitOn
"."
dateStr'
pure
$
parseTimeOrError
True
defaultTimeLocale
format
dateStr
import
Duckling.Types
(
ResolvedToken
)
import
Safe
(
headMay
)
import
Text.Parsec.Error
(
ParseError
)
import
Text.Parsec.String
(
Parser
)
import
Text.Parsec.Prim
(
Stream
,
ParsecT
)
import
qualified
Text.ParserCombinators.Parsec
(
parse
)
import
Text.ParserCombinators.Parsec
(
many1
,
noneOf
,
anyChar
,
char
,
oneOf
)
import
Text.XML.HXT.DOM.Util
(
decimalStringToInt
)
-- TODO add Paris at Duckling.Locale Region datatype
-- | To get Homogeinity of the languages
-- TODO : put this in a more generic place in the source code
...
...
@@ -74,19 +79,18 @@ parserLang EN = DC.EN
-- IO can be avoided here:
-- currentContext :: Lang -> IO Context
-- currentContext lang = localContext lang <$> utcToDucklingTime <$> getCurrentTime
-- parseDate
1
:: Context -> Text -> SomeErrorHandling Text
-- parseDate
Raw
:: Context -> Text -> SomeErrorHandling Text
-- TODO error handling
parseDate
1
::
Lang
->
Text
->
IO
Text
parseDate
1
lang
text
=
do
parseDate
Raw
::
Lang
->
Text
->
IO
Text
parseDate
Raw
lang
text
=
do
maybeJson
<-
map
jsonValue
<$>
parseDateWithDuckling
lang
text
case
headMay
maybeJson
of
Just
(
Json
.
Object
object
)
->
case
HM
.
lookup
"value"
object
of
Just
(
Json
.
String
date
)
->
pure
date
Just
_
->
panic
"ParseDate ERROR: should be a json String"
Nothing
->
panic
"ParseDate ERROR: no date found"
_
->
panic
"ParseDate ERROR: type error"
Just
_
->
panic
"ParseDateRaw ERROR: should be a json String"
Nothing
->
panic
"ParseDateRaw ERROR: no date found"
_
->
panic
"ParseDateRaw ERROR: type error"
-- | Current Time in DucklingTime format
...
...
@@ -105,70 +109,64 @@ parseDateWithDuckling lang input = do
--pure $ parseAndResolve (rulesFor (locale ctx) (HashSet.fromList [(This Time)])) input ctx
pure
$
analyze
input
contxt
$
HashSet
.
fromList
[(
This
Time
)]
parseDate
::
Lang
->
Text
->
IO
[
Entity
]
parseDate
lang
input
=
do
context
<-
localContext
lang
<$>
utcToDucklingTime
<$>
getCurrentTime
pure
$
parse
input
context
[(
This
Time
)]
-- | Permit to transform a String to an Int in a monadic context
wrapDST
::
Monad
m
=>
String
->
m
Int
wrapDST
=
(
return
.
decimalStringToInt
)
--
wrapDST :: Monad m => String -> m Int
--
wrapDST = (return . decimalStringToInt)
-- | Generic parser which take at least one element not given in argument
many1NoneOf
::
Stream
s
m
Char
=>
[
Char
]
->
ParsecT
s
u
m
[
Char
]
many1NoneOf
=
(
many1
.
noneOf
)
--
many1NoneOf :: Stream s m Char => [Char] -> ParsecT s u m [Char]
--
many1NoneOf = (many1 . noneOf)
getMultiplicator
::
Int
->
Int
getMultiplicator
a
|
0
>=
a
=
1
|
otherwise
=
10
*
(
getMultiplicator
$
div
a
10
)
--
getMultiplicator :: Int -> Int
--
getMultiplicator a
--
| 0 >= a = 1
--
| otherwise = 10 * (getMultiplicator $ div a 10)
-- | Parser for date format y-m-d
parseGregorian
::
Parser
Day
parseGregorian
=
do
y
<-
wrapDST
=<<
many1NoneOf
[
'-'
]
_
<-
char
'-'
m
<-
wrapDST
=<<
many1NoneOf
[
'-'
]
_
<-
char
'-'
d
<-
wrapDST
=<<
many1NoneOf
[
'T'
]
_
<-
char
'T'
return
$
fromGregorian
(
toInteger
y
)
m
d
-- | Parser for time format h:m:s
parseTimeOfDay
::
Parser
TimeOfDay
parseTimeOfDay
=
do
h
<-
wrapDST
=<<
many1NoneOf
[
':'
]
_
<-
char
':'
m
<-
wrapDST
=<<
many1NoneOf
[
':'
]
_
<-
char
':'
r
<-
many1NoneOf
[
'.'
]
_
<-
char
'.'
dec
<-
many1NoneOf
[
'+'
,
'-'
]
let
(
nb
,
l
)
=
(
decimalStringToInt
$
r
++
dec
,
length
dec
)
seconds
=
nb
*
10
^
(
12
-
l
)
return
$
TimeOfDay
h
m
(
MkFixed
.
toInteger
$
seconds
)
--
parseGregorian :: Parser Day
--
parseGregorian = do
--
y <- wrapDST =<< many1NoneOf ['-']
--
_ <- char '-'
--
m <- wrapDST =<< many1NoneOf ['-']
--
_ <- char '-'
--
d <- wrapDST =<< many1NoneOf ['T']
--
_ <- char 'T'
--
return $ fromGregorian (toInteger y) m d
--
--
--
| Parser for time format h:m:s
--
parseTimeOfDay :: Parser TimeOfDay
--
parseTimeOfDay = do
--
h <- wrapDST =<< many1NoneOf [':']
--
_ <- char ':'
--
m <- wrapDST =<< many1NoneOf [':']
--
_ <- char ':'
--
r <- many1NoneOf ['.']
--
_ <- char '.'
--
dec <- many1NoneOf ['+', '-']
--
let (nb, l) = (decimalStringToInt $ r ++ dec, length dec)
--
seconds = nb * 10^(12-l)
--
return $ TimeOfDay h m (MkFixed . toInteger $ seconds)
--
--
-- | Parser for timezone format +hh:mm
parseTimeZone
::
Parser
TimeZone
parseTimeZone
=
do
sign
<-
oneOf
[
'+'
,
'-'
]
h
<-
wrapDST
=<<
many1NoneOf
[
':'
]
_
<-
char
':'
m
<-
wrapDST
=<<
(
many1
$
anyChar
)
let
timeInMinute
=
if
sign
==
'+'
then
h
*
60
+
m
else
-
h
*
60
-
m
in
return
$
TimeZone
timeInMinute
False
"CET"
-- | Parser which use parseGregorian, parseTimeOfDay and parseTimeZone to create a ZonedTime
parseZonedTime
::
Parser
ZonedTime
parseZonedTime
=
do
d
<-
parseGregorian
tod
<-
parseTimeOfDay
tz
<-
parseTimeZone
return
$
ZonedTime
(
LocalTime
d
(
tod
))
tz
-- | Opposite of toRFC3339
fromRFC3339
::
Text
->
Either
ParseError
ZonedTime
fromRFC3339
t
=
Text
.
ParserCombinators
.
Parsec
.
parse
parseZonedTime
"ERROR: Couldn't parse zoned time."
input
where
input
=
unpack
t
--
parseTimeZone :: Parser TimeZone
--
parseTimeZone = do
--
sign <- oneOf ['+', '-']
--
h <- wrapDST =<< many1NoneOf [':']
--
_ <- char ':'
--
m <- wrapDST =<< (many1 $ anyChar)
--
let timeInMinute = if sign == '+' then h * 60 + m else -h * 60 - m
--
in return $ TimeZone timeInMinute False "CET"
--
--
--
| Parser which use parseGregorian, parseTimeOfDay and parseTimeZone to create a ZonedTime
--
parseZonedTime :: Parser ZonedTime
--
parseZonedTime= do
--
d <- parseGregorian
--
tod <- parseTimeOfDay
--
tz <- parseTimeZone
--
return $ ZonedTime (LocalTime d (tod)) tz
--
--
--
| Opposite of toRFC3339
--
fromRFC3339 :: Text -> Either ParseError ZonedTime
--
fromRFC3339 t = Text.ParserCombinators.Parsec.parse parseZonedTime "ERROR: Couldn't parse zoned time." input
--
where input = unpack t
src/Gargantext/Text/Parsers/Wikimedia.hs
View file @
f40b051d
...
...
@@ -27,7 +27,7 @@ import Data.Text as T
import
Data.Either
-- | Use case
--
>>>
:{
-- :{
-- wikimediaFile <- BL.readFile "text.xml"
-- _ <- runConduit $ parseLBS def wikimediaFile
-- .| force "mediawiki required" parseMediawiki
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment