Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
147
Issues
147
List
Board
Labels
Milestones
Merge Requests
6
Merge Requests
6
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
gargantext
haskell-gargantext
Commits
f40b051d
Commit
f40b051d
authored
Oct 15, 2018
by
Alexandre Delanoë
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
[DATE] parser -> UTCTime
parent
18ceac9c
Changes
5
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
126 additions
and
124 deletions
+126
-124
package.yaml
package.yaml
+8
-7
Core.hs
src/Gargantext/Core.hs
+5
-1
Parsers.hs
src/Gargantext/Text/Parsers.hs
+10
-11
Date.hs
src/Gargantext/Text/Parsers/Date.hs
+102
-104
Wikimedia.hs
src/Gargantext/Text/Parsers/Wikimedia.hs
+1
-1
No files found.
package.yaml
View file @
f40b051d
...
...
@@ -169,12 +169,12 @@ executables:
main
:
Main.hs
source-dirs
:
bin/gargantext-server
ghc-options
:
-
-threaded
-
-rtsopts
-
-with-rtsopts=-N
-
-O2
-
-Wmissing-signatures
-
-Wcompat
-
-Wmissing-signatures
-
-rtsopts
-
-threaded
-
-with-rtsopts=-N
dependencies
:
-
base
-
containers
...
...
@@ -232,11 +232,12 @@ tests:
main
:
Main.hs
source-dirs
:
src-doctest
ghc-options
:
-
-Werror
-
-threaded
-
-O2
-
-Wcompat
-
-Wmissing-signatures
-
-rtsopts
-
-threaded
-
-with-rtsopts=-N
-
-Wmissing-signatures
dependencies
:
-
doctest
-
Glob
...
...
src/Gargantext/Core.hs
View file @
f40b051d
...
...
@@ -25,8 +25,12 @@ module Gargantext.Core
-- - SP == spanish (not implemented yet)
--
-- ... add your language and help us to implement it (:
data
Lang
=
EN
|
FR
-- | DE | SP | CH
-- | All languages supported
-- TODO : DE | SP | CH
data
Lang
=
EN
|
FR
deriving
(
Show
,
Eq
,
Ord
,
Bounded
,
Enum
)
allLangs
::
[
Lang
]
allLangs
=
[
minBound
..
]
src/Gargantext/Text/Parsers.hs
View file @
f40b051d
...
...
@@ -19,12 +19,13 @@ please follow the types.
-}
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE PackageImports #-}
module
Gargantext.Text.Parsers
-- (parse, FileFormat(..)
)
module
Gargantext.Text.Parsers
(
parse
,
FileFormat
(
..
),
clean
)
where
import
System.FilePath
(
FilePath
(),
takeExtension
)
import
Codec.Archive.Zip
(
withArchive
,
getEntry
,
getEntries
)
import
"zip"
Codec.Archive.Zip
(
withArchive
,
getEntry
,
getEntries
)
import
Data.Either.Extra
(
partitionEithers
)
import
Data.List
(
concat
)
...
...
@@ -49,15 +50,13 @@ import Gargantext.Prelude
import
Gargantext.Text.Parsers.WOS
(
wosParser
)
------------------------------------------------------------------------
type
ParseError
=
String
type
Field
=
Text
type
Document
=
DM
.
Map
Field
Text
type
FilesParsed
=
DM
.
Map
FilePath
FileParsed
data
FileParsed
=
FileParsed
{
_fileParsed_errors
::
Maybe
ParseError
,
_fileParsed_result
::
[
Document
]
}
deriving
(
Show
)
--type Field = Text
--type Document = DM.Map Field Text
--type FilesParsed = DM.Map FilePath FileParsed
--data FileParsed = FileParsed { _fileParsed_errors :: Maybe ParseError
-- , _fileParsed_result :: [Document]
-- } deriving (Show)
-- | According to the format of Input file,
...
...
@@ -85,7 +84,7 @@ parse format path = do
-- | withParser:
-- According t
he format of the text, choosing
the right parser.
-- According t
o the format of the text, choose
the right parser.
-- TODO withParser :: FileFormat -> Parser [Document]
withParser
::
FileFormat
->
Parser
[[(
DB
.
ByteString
,
DB
.
ByteString
)]]
withParser
WOS
=
wosParser
...
...
src/Gargantext/Text/Parsers/Date.hs
View file @
f40b051d
...
...
@@ -7,61 +7,66 @@ Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
According to the language of the text, parseDate
1
returns date as Text:
According to the language of the text, parseDate
Raw
returns date as Text:
TODO : Add some tests
import Gargantext.Parsers.Date as DGP
DGP.parseDate
1
DGP.FR "12 avril 2010" == "2010-04-12T00:00:00.000+00:00"
import Gargantext.
Text.
Parsers.Date as DGP
DGP.parseDate
Raw
DGP.FR "12 avril 2010" == "2010-04-12T00:00:00.000+00:00"
-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE FlexibleContexts
#-}
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE OverloadedStrings #-}
module
Gargantext.Text.Parsers.Date
(
parseDate1
,
parseDate
,
fromRFC3339
,
parseTimeOfDay
,
getMultiplicator
)
where
import
Gargantext.Core
(
Lang
(
FR
,
EN
))
import
Gargantext.Prelude
import
Prelude
(
toInteger
,
div
,
otherwise
,
(
++
))
--import Gargantext.Types.Main as G
module
Gargantext.Text.Parsers.Date
(
parseDate
,
parseDateRaw
)
where
import
Data.HashMap.Strict
as
HM
hiding
(
map
)
import
Data.Text
(
Text
,
unpack
,
splitOn
)
import
Data.Time
(
parseTimeOrError
,
defaultTimeLocale
)
import
Data.Time.Clock
(
UTCTime
,
getCurrentTime
)
import
Data.Time.LocalTime.TimeZone.Series
(
zonedTimeToZoneSeriesTime
)
import
Data.Time.LocalTime
(
utc
)
import
Duckling.Resolve
(
fromUTC
,
Context
(
Context
,
referenceTime
,
locale
)
,
DucklingTime
(
DucklingTime
)
)
import
Data.Time.LocalTime.TimeZone.Series
(
zonedTimeToZoneSeriesTime
)
import
Duckling.Api
(
analyze
)
import
Duckling.Core
(
makeLocale
,
Some
(
This
),
Dimension
(
Time
))
import
qualified
Duckling.Core
as
DC
import
Duckling.Types
(
jsonValue
,
Entity
)
import
Duckling.Api
(
analyze
,
parse
)
import
qualified
Data.HashSet
as
HashSet
import
Duckling.Resolve
(
fromUTC
,
Context
(
Context
,
referenceTime
,
locale
),
DucklingTime
(
DucklingTime
))
import
Duckling.Types
(
ResolvedToken
)
import
Duckling.Types
(
jsonValue
)
import
Gargantext.Core
(
Lang
(
FR
,
EN
)
)
import
Gargantext.Prelude
import
qualified
Data.Aeson
as
Json
import
Data.Time
(
ZonedTime
(
..
),
LocalTime
(
..
),
TimeZone
(
..
),
TimeOfDay
(
..
))
import
Data.Time.Calendar
(
Day
,
fromGregorian
)
import
Data.Fixed
(
Fixed
(
MkFixed
))
import
Data.Foldable
(
length
)
import
Data.HashMap.Strict
as
HM
hiding
(
map
)
import
qualified
Data.HashSet
as
HashSet
import
qualified
Duckling.Core
as
DC
import
Control.Monad
((
=<<
))
import
Data.Either
(
Either
)
import
Data.String
(
String
)
import
Data.Text
(
Text
,
unpack
)
-- | Unused import (to parse Date Format, keeping it for maybe next steps)
-- import Control.Monad ((=<<))
-- import Data.Either (Either)
-- import Data.Fixed (Fixed (MkFixed))
-- import Data.Foldable (length)
-- import Data.String (String)
-- import Data.Time (ZonedTime(..), LocalTime(..), TimeZone(..), TimeOfDay(..))
-- import Data.Time.Calendar (Day, fromGregorian)
-- import Duckling.Debug as DB
-- import Duckling.Engine (parseAndResolve)
-- import Duckling.Rules (rulesFor)
-- import Duckling.Debug as DB
-- import Prelude (toInteger, div, otherwise, (++))
-- import Text.Parsec.Error (ParseError)
-- import Text.Parsec.Prim (Stream, ParsecT)
-- import Text.Parsec.String (Parser)
-- import Text.ParserCombinators.Parsec (many1, noneOf, anyChar, char, oneOf)
-- import Text.XML.HXT.DOM.Util (decimalStringToInt)
-- import qualified Text.ParserCombinators.Parsec (parse)
------------------------------------------------------------------------
parseDate
::
Lang
->
Text
->
IO
UTCTime
parseDate
lang
s
=
do
dateStr'
<-
parseDateRaw
lang
s
let
format
=
"%Y-%m-%dT%T"
let
dateStr
=
unpack
$
maybe
"0-0-0T0:0:0"
identity
$
head
$
splitOn
"."
dateStr'
pure
$
parseTimeOrError
True
defaultTimeLocale
format
dateStr
import
Duckling.Types
(
ResolvedToken
)
import
Safe
(
headMay
)
import
Text.Parsec.Error
(
ParseError
)
import
Text.Parsec.String
(
Parser
)
import
Text.Parsec.Prim
(
Stream
,
ParsecT
)
import
qualified
Text.ParserCombinators.Parsec
(
parse
)
import
Text.ParserCombinators.Parsec
(
many1
,
noneOf
,
anyChar
,
char
,
oneOf
)
import
Text.XML.HXT.DOM.Util
(
decimalStringToInt
)
-- TODO add Paris at Duckling.Locale Region datatype
-- | To get Homogeinity of the languages
-- TODO : put this in a more generic place in the source code
...
...
@@ -74,19 +79,18 @@ parserLang EN = DC.EN
-- IO can be avoided here:
-- currentContext :: Lang -> IO Context
-- currentContext lang = localContext lang <$> utcToDucklingTime <$> getCurrentTime
-- parseDate
1
:: Context -> Text -> SomeErrorHandling Text
-- parseDate
Raw
:: Context -> Text -> SomeErrorHandling Text
-- TODO error handling
parseDate
1
::
Lang
->
Text
->
IO
Text
parseDate
1
lang
text
=
do
parseDate
Raw
::
Lang
->
Text
->
IO
Text
parseDate
Raw
lang
text
=
do
maybeJson
<-
map
jsonValue
<$>
parseDateWithDuckling
lang
text
case
headMay
maybeJson
of
Just
(
Json
.
Object
object
)
->
case
HM
.
lookup
"value"
object
of
Just
(
Json
.
String
date
)
->
pure
date
Just
_
->
panic
"ParseDate ERROR: should be a json String"
Nothing
->
panic
"ParseDate ERROR: no date found"
_
->
panic
"ParseDate ERROR: type error"
Just
_
->
panic
"ParseDateRaw ERROR: should be a json String"
Nothing
->
panic
"ParseDateRaw ERROR: no date found"
_
->
panic
"ParseDateRaw ERROR: type error"
-- | Current Time in DucklingTime format
...
...
@@ -105,70 +109,64 @@ parseDateWithDuckling lang input = do
--pure $ parseAndResolve (rulesFor (locale ctx) (HashSet.fromList [(This Time)])) input ctx
pure
$
analyze
input
contxt
$
HashSet
.
fromList
[(
This
Time
)]
parseDate
::
Lang
->
Text
->
IO
[
Entity
]
parseDate
lang
input
=
do
context
<-
localContext
lang
<$>
utcToDucklingTime
<$>
getCurrentTime
pure
$
parse
input
context
[(
This
Time
)]
-- | Permit to transform a String to an Int in a monadic context
wrapDST
::
Monad
m
=>
String
->
m
Int
wrapDST
=
(
return
.
decimalStringToInt
)
--
wrapDST :: Monad m => String -> m Int
--
wrapDST = (return . decimalStringToInt)
-- | Generic parser which take at least one element not given in argument
many1NoneOf
::
Stream
s
m
Char
=>
[
Char
]
->
ParsecT
s
u
m
[
Char
]
many1NoneOf
=
(
many1
.
noneOf
)
--
many1NoneOf :: Stream s m Char => [Char] -> ParsecT s u m [Char]
--
many1NoneOf = (many1 . noneOf)
getMultiplicator
::
Int
->
Int
getMultiplicator
a
|
0
>=
a
=
1
|
otherwise
=
10
*
(
getMultiplicator
$
div
a
10
)
--
getMultiplicator :: Int -> Int
--
getMultiplicator a
--
| 0 >= a = 1
--
| otherwise = 10 * (getMultiplicator $ div a 10)
-- | Parser for date format y-m-d
parseGregorian
::
Parser
Day
parseGregorian
=
do
y
<-
wrapDST
=<<
many1NoneOf
[
'-'
]
_
<-
char
'-'
m
<-
wrapDST
=<<
many1NoneOf
[
'-'
]
_
<-
char
'-'
d
<-
wrapDST
=<<
many1NoneOf
[
'T'
]
_
<-
char
'T'
return
$
fromGregorian
(
toInteger
y
)
m
d
-- | Parser for time format h:m:s
parseTimeOfDay
::
Parser
TimeOfDay
parseTimeOfDay
=
do
h
<-
wrapDST
=<<
many1NoneOf
[
':'
]
_
<-
char
':'
m
<-
wrapDST
=<<
many1NoneOf
[
':'
]
_
<-
char
':'
r
<-
many1NoneOf
[
'.'
]
_
<-
char
'.'
dec
<-
many1NoneOf
[
'+'
,
'-'
]
let
(
nb
,
l
)
=
(
decimalStringToInt
$
r
++
dec
,
length
dec
)
seconds
=
nb
*
10
^
(
12
-
l
)
return
$
TimeOfDay
h
m
(
MkFixed
.
toInteger
$
seconds
)
--
parseGregorian :: Parser Day
--
parseGregorian = do
--
y <- wrapDST =<< many1NoneOf ['-']
--
_ <- char '-'
--
m <- wrapDST =<< many1NoneOf ['-']
--
_ <- char '-'
--
d <- wrapDST =<< many1NoneOf ['T']
--
_ <- char 'T'
--
return $ fromGregorian (toInteger y) m d
--
--
--
| Parser for time format h:m:s
--
parseTimeOfDay :: Parser TimeOfDay
--
parseTimeOfDay = do
--
h <- wrapDST =<< many1NoneOf [':']
--
_ <- char ':'
--
m <- wrapDST =<< many1NoneOf [':']
--
_ <- char ':'
--
r <- many1NoneOf ['.']
--
_ <- char '.'
--
dec <- many1NoneOf ['+', '-']
--
let (nb, l) = (decimalStringToInt $ r ++ dec, length dec)
--
seconds = nb * 10^(12-l)
--
return $ TimeOfDay h m (MkFixed . toInteger $ seconds)
--
--
-- | Parser for timezone format +hh:mm
parseTimeZone
::
Parser
TimeZone
parseTimeZone
=
do
sign
<-
oneOf
[
'+'
,
'-'
]
h
<-
wrapDST
=<<
many1NoneOf
[
':'
]
_
<-
char
':'
m
<-
wrapDST
=<<
(
many1
$
anyChar
)
let
timeInMinute
=
if
sign
==
'+'
then
h
*
60
+
m
else
-
h
*
60
-
m
in
return
$
TimeZone
timeInMinute
False
"CET"
-- | Parser which use parseGregorian, parseTimeOfDay and parseTimeZone to create a ZonedTime
parseZonedTime
::
Parser
ZonedTime
parseZonedTime
=
do
d
<-
parseGregorian
tod
<-
parseTimeOfDay
tz
<-
parseTimeZone
return
$
ZonedTime
(
LocalTime
d
(
tod
))
tz
-- | Opposite of toRFC3339
fromRFC3339
::
Text
->
Either
ParseError
ZonedTime
fromRFC3339
t
=
Text
.
ParserCombinators
.
Parsec
.
parse
parseZonedTime
"ERROR: Couldn't parse zoned time."
input
where
input
=
unpack
t
--
parseTimeZone :: Parser TimeZone
--
parseTimeZone = do
--
sign <- oneOf ['+', '-']
--
h <- wrapDST =<< many1NoneOf [':']
--
_ <- char ':'
--
m <- wrapDST =<< (many1 $ anyChar)
--
let timeInMinute = if sign == '+' then h * 60 + m else -h * 60 - m
--
in return $ TimeZone timeInMinute False "CET"
--
--
--
| Parser which use parseGregorian, parseTimeOfDay and parseTimeZone to create a ZonedTime
--
parseZonedTime :: Parser ZonedTime
--
parseZonedTime= do
--
d <- parseGregorian
--
tod <- parseTimeOfDay
--
tz <- parseTimeZone
--
return $ ZonedTime (LocalTime d (tod)) tz
--
--
--
| Opposite of toRFC3339
--
fromRFC3339 :: Text -> Either ParseError ZonedTime
--
fromRFC3339 t = Text.ParserCombinators.Parsec.parse parseZonedTime "ERROR: Couldn't parse zoned time." input
--
where input = unpack t
src/Gargantext/Text/Parsers/Wikimedia.hs
View file @
f40b051d
...
...
@@ -27,7 +27,7 @@ import Data.Text as T
import
Data.Either
-- | Use case
--
>>>
:{
-- :{
-- wikimediaFile <- BL.readFile "text.xml"
-- _ <- runConduit $ parseLBS def wikimediaFile
-- .| force "mediawiki required" parseMediawiki
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment