Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
158
Issues
158
List
Board
Labels
Milestones
Merge Requests
11
Merge Requests
11
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
gargantext
haskell-gargantext
Commits
5c1b33ff
Commit
5c1b33ff
authored
Dec 12, 2017
by
Alexandre Delanoë
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
[PARSER] Date + WOS parser.
parent
e887a14c
Changes
6
Show whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
93 additions
and
29 deletions
+93
-29
Main.hs
app/Main.hs
+2
-2
gargantext.cabal
gargantext.cabal
+26
-19
package.yaml
package.yaml
+2
-0
Date.hs
src/Data/Gargantext/Parsers/Date.hs
+59
-5
WOS.hs
src/Data/Gargantext/Parsers/WOS.hs
+2
-3
Prelude.hs
src/Data/Gargantext/Prelude.hs
+2
-0
No files found.
app/Main.hs
View file @
5c1b33ff
module
Main
where
import
Hastext.Db
import
Gargantext.Parser.Wos
(
parseWos
)
main
::
IO
()
main
=
fonction
main
=
parseWos
"/tmp/DeepNeuralNetworkFull.zip"
gargantext.cabal
View file @
5c1b33ff
-- This file has been generated from package.yaml by hpack version 0.
18.1
.
-- This file has been generated from package.yaml by hpack version 0.
20.0
.
--
-- see: https://github.com/sol/hpack
--
-- hash: d9ae37baf58628321e1cf53c125f895c9fd3ff19c03fdfaa1ca9b7754fecabf9
name: gargantext
version: 0.1.0.0
...
...
@@ -21,28 +23,29 @@ library
hs-source-dirs:
src
build-depends:
extra
, text
, base >=4.7 && <5
, aeson
, attoparsec
aeson
, aeson-lens
, async
, attoparsec
, base >=4.7 && <5
, base16-bytestring
, bytestring
, case-insensitive
, containers
, contravariant
, conduit
, conduit-extra
, containers
, contravariant
, directory
, duckling
, extra
, filepath
, http-conduit
, lens
, logging-effect
, opaleye
, path
, parsec
, path
, path-io
, postgresql-simple
, pretty
, product-profunctors
...
...
@@ -50,6 +53,7 @@ library
, protolude
, pureMD5
, regex-compat
, safe
, semigroups
, servant
, servant-client
...
...
@@ -57,10 +61,11 @@ library
, servant-server
, split
, tagsoup
, text
, text-metrics
, time
, timezone-series
, time-locale-compat
, timezone-series
, transformers
, unordered-containers
, uuid
...
...
@@ -68,9 +73,8 @@ library
, wai
, warp
, yaml
, zlib
, zip
,
path-io
,
zlib
exposed-modules:
Data.Gargantext
Data.Gargantext.Analysis
...
...
@@ -118,13 +122,15 @@ test-suite garg-doctest
src-doctest
ghc-options: -Wall -Werror -threaded -rtsopts -with-rtsopts=-N
build-depends:
extra
, text
, doctest
, Glob
Glob
, QuickCheck
, base
, doctest
, extra
, gargantext
, text
other-modules:
Paths_gargantext
default-language: Haskell2010
test-suite garg-test
...
...
@@ -134,12 +140,12 @@ test-suite garg-test
src-test
ghc-options: -Wall -threaded -rtsopts -with-rtsopts=-N
build-depends:
extra
, text
QuickCheck
, base
, extra
, gargantext
, hspec
,
QuickCheck
,
text
other-modules:
Ngrams.Lang
Ngrams.Lang.En
...
...
@@ -147,4 +153,5 @@ test-suite garg-test
Ngrams.Lang.Occurrences
Ngrams.Metrics
Parsers.WOS
Paths_gargantext
default-language: Haskell2010
package.yaml
View file @
5c1b33ff
...
...
@@ -57,6 +57,7 @@ library:
dependencies
:
-
base >=4.7 && <5
-
aeson
-
aeson-lens
-
attoparsec
-
async
-
base16-bytestring
...
...
@@ -82,6 +83,7 @@ library:
-
protolude
-
pureMD5
-
regex-compat
-
safe
-
semigroups
-
servant
-
servant-client
...
...
src/Data/Gargantext/Parsers/Date.hs
View file @
5c1b33ff
module
Data.Gargantext.Parsers.Date
where
{-|
Module : Data.Gargantext.Parsers.Date
Description : Some utils to parse dates
Copyright : (c) CNRS 2017
License : AGPL + CECILL v3
Maintainer : alexandre.delanoe@iscpif.fr
Stability : experimental
Portability : POSIX
According to the language of the text, parseDate1 returns date as Text:
TODO : Add some tests
import Data.Gargantext.Parsers as DGP
DGP.parseDate1 DGP.FR "12 avril 2010" == "2010-04-12T00:00:00.000+00:00"
-}
module
Data.Gargantext.Parsers.Date
(
parseDate1
,
Lang
(
FR
,
EN
))
where
import
Data.Gargantext.Prelude
import
qualified
Data.Gargantext.Types.Main
as
G
import
Data.Time.Clock
(
UTCTime
,
getCurrentTime
)
import
Data.Time.LocalTime.TimeZone.Series
(
zonedTimeToZoneSeriesTime
)
...
...
@@ -6,28 +26,62 @@ import Data.Time.LocalTime (utc)
import
Duckling.Resolve
(
fromUTC
,
Context
(
Context
,
referenceTime
,
locale
)
,
DucklingTime
(
DucklingTime
)
)
--import Duckling.Core (makeLocale, Lang(FR,EN), Some(This), Dimension(Time))
import
Duckling.Core
(
makeLocale
,
Lang
(),
Some
(
This
),
Dimension
(
Time
))
import
Duckling.Core
(
makeLocale
,
Lang
(
FR
,
EN
),
Some
(
This
),
Dimension
(
Time
))
import
Duckling.Types
(
jsonValue
)
--import qualified Duckling.Core as DC
import
Duckling.Api
(
analyze
)
import
qualified
Data.HashSet
as
HashSet
import
qualified
Data.Aeson
as
Json
import
Data.HashMap.Strict
as
HM
import
Data.Text
(
Text
)
-- import Duckling.Engine (parseAndResolve)
-- import Duckling.Rules (rulesFor)
-- import Duckling.Debug as DB
import
Safe
(
headMay
)
import
Duckling.Types
(
ResolvedToken
)
-- TODO add Paris at Duckling.Locale Region datatype
-- | To get Homogeinity of the languages
-- TODO : put this in a more generic place in the source code
parserLang
::
G
.
Language
->
Lang
parserLang
G
.
FR
=
FR
parserLang
G
.
EN
=
EN
-- | Final Date parser API
parseDate1
::
Lang
->
Text
->
IO
Text
parseDate1
lang
text
=
do
maybeJson
<-
pm
jsonValue
<$>
parseDateWithDuckling
lang
text
case
headMay
maybeJson
of
Just
(
Json
.
Object
object
)
->
case
HM
.
lookup
"value"
object
of
Just
(
Json
.
String
date
)
->
pure
date
Just
_
->
error
"ERROR: should be a json String"
Nothing
->
error
"No date found"
Just
_
->
error
"ERROR: should be a json Object"
Nothing
->
pure
"No date found"
-- | Current Time in DucklingTime format
-- TODO : get local Time in a more generic way
utcToDucklingTime
::
UTCTime
->
DucklingTime
utcToDucklingTime
time
=
DucklingTime
.
zonedTimeToZoneSeriesTime
$
fromUTC
time
utc
-- | Local Context which depends on Lang and Time
localContext
::
Lang
->
DucklingTime
->
Context
localContext
lang
dt
=
Context
{
referenceTime
=
dt
,
locale
=
makeLocale
lang
Nothing
}
parseDate
::
Lang
->
Text
->
IO
[
ResolvedToken
]
parseDate
lang
input
=
do
-- | Date parser with Duckling
parseDateWithDuckling
::
Lang
->
Text
->
IO
[
ResolvedToken
]
parseDateWithDuckling
lang
input
=
do
ctx
<-
localContext
lang
<$>
utcToDucklingTime
<$>
getCurrentTime
--pure $ parseAndResolve (rulesFor (locale ctx) (HashSet.fromList [(This Time)])) input ctx
pure
$
analyze
input
ctx
$
HashSet
.
fromList
[(
This
Time
)]
src/Data/Gargantext/Parsers/WOS.hs
View file @
5c1b33ff
...
...
@@ -50,7 +50,6 @@ endNotice = manyTill anyChar (string $ pack "\nER\n")
startNotice
::
Parser
ByteString
startNotice
=
"
\n
PT "
*>
takeTill
isEndOfLine
field'
::
Parser
(
ByteString
,
[
ByteString
])
field'
=
do
f
<-
"
\n
"
*>
take
2
<*
" "
...
...
@@ -113,8 +112,8 @@ parseFile p x = case runParser p x of
Left
_
->
pure
0
Right
r
->
pure
$
length
r
test
Wos
::
FilePath
->
IO
[
Int
]
test
Wos
fp
=
join
$
mapConcurrently
(
parseFile
WOS
)
<$>
zipFiles
fp
parse
Wos
::
FilePath
->
IO
[
Int
]
parse
Wos
fp
=
join
$
mapConcurrently
(
parseFile
WOS
)
<$>
zipFiles
fp
src/Data/Gargantext/Prelude.hs
View file @
5c1b33ff
...
...
@@ -36,6 +36,8 @@ import qualified Data.List as L hiding (head, sum)
import
qualified
Control.Monad
as
M
import
qualified
Data.Map
as
Map
import
qualified
Data.Vector
as
V
import
Safe
(
headMay
)
pf
::
(
a
->
Bool
)
->
[
a
]
->
[
a
]
pf
=
filter
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment