Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
147
Issues
147
List
Board
Labels
Milestones
Merge Requests
6
Merge Requests
6
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
gargantext
haskell-gargantext
Commits
5c1b33ff
Commit
5c1b33ff
authored
Dec 12, 2017
by
Alexandre Delanoë
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
[PARSER] Date + WOS parser.
parent
e887a14c
Changes
6
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
93 additions
and
29 deletions
+93
-29
Main.hs
app/Main.hs
+2
-2
gargantext.cabal
gargantext.cabal
+26
-19
package.yaml
package.yaml
+2
-0
Date.hs
src/Data/Gargantext/Parsers/Date.hs
+59
-5
WOS.hs
src/Data/Gargantext/Parsers/WOS.hs
+2
-3
Prelude.hs
src/Data/Gargantext/Prelude.hs
+2
-0
No files found.
app/Main.hs
View file @
5c1b33ff
module
Main
where
import
Hastext.Db
import
Gargantext.Parser.Wos
(
parseWos
)
main
::
IO
()
main
=
fonction
main
=
parseWos
"/tmp/DeepNeuralNetworkFull.zip"
gargantext.cabal
View file @
5c1b33ff
-- This file has been generated from package.yaml by hpack version 0.
18.1
.
-- This file has been generated from package.yaml by hpack version 0.
20.0
.
--
-- see: https://github.com/sol/hpack
--
-- hash: d9ae37baf58628321e1cf53c125f895c9fd3ff19c03fdfaa1ca9b7754fecabf9
name: gargantext
version: 0.1.0.0
...
...
@@ -21,28 +23,29 @@ library
hs-source-dirs:
src
build-depends:
extra
, text
, base >=4.7 && <5
, aeson
, attoparsec
aeson
, aeson-lens
, async
, attoparsec
, base >=4.7 && <5
, base16-bytestring
, bytestring
, case-insensitive
, containers
, contravariant
, conduit
, conduit-extra
, containers
, contravariant
, directory
, duckling
, extra
, filepath
, http-conduit
, lens
, logging-effect
, opaleye
, path
, parsec
, path
, path-io
, postgresql-simple
, pretty
, product-profunctors
...
...
@@ -50,6 +53,7 @@ library
, protolude
, pureMD5
, regex-compat
, safe
, semigroups
, servant
, servant-client
...
...
@@ -57,10 +61,11 @@ library
, servant-server
, split
, tagsoup
, text
, text-metrics
, time
, timezone-series
, time-locale-compat
, timezone-series
, transformers
, unordered-containers
, uuid
...
...
@@ -68,9 +73,8 @@ library
, wai
, warp
, yaml
, zlib
, zip
,
path-io
,
zlib
exposed-modules:
Data.Gargantext
Data.Gargantext.Analysis
...
...
@@ -118,13 +122,15 @@ test-suite garg-doctest
src-doctest
ghc-options: -Wall -Werror -threaded -rtsopts -with-rtsopts=-N
build-depends:
extra
, text
, doctest
, Glob
Glob
, QuickCheck
, base
, doctest
, extra
, gargantext
, text
other-modules:
Paths_gargantext
default-language: Haskell2010
test-suite garg-test
...
...
@@ -134,12 +140,12 @@ test-suite garg-test
src-test
ghc-options: -Wall -threaded -rtsopts -with-rtsopts=-N
build-depends:
extra
, text
QuickCheck
, base
, extra
, gargantext
, hspec
,
QuickCheck
,
text
other-modules:
Ngrams.Lang
Ngrams.Lang.En
...
...
@@ -147,4 +153,5 @@ test-suite garg-test
Ngrams.Lang.Occurrences
Ngrams.Metrics
Parsers.WOS
Paths_gargantext
default-language: Haskell2010
package.yaml
View file @
5c1b33ff
...
...
@@ -57,6 +57,7 @@ library:
dependencies
:
-
base >=4.7 && <5
-
aeson
-
aeson-lens
-
attoparsec
-
async
-
base16-bytestring
...
...
@@ -82,6 +83,7 @@ library:
-
protolude
-
pureMD5
-
regex-compat
-
safe
-
semigroups
-
servant
-
servant-client
...
...
src/Data/Gargantext/Parsers/Date.hs
View file @
5c1b33ff
module
Data.Gargantext.Parsers.Date
where
{-|
Module : Data.Gargantext.Parsers.Date
Description : Some utils to parse dates
Copyright : (c) CNRS 2017
License : AGPL + CECILL v3
Maintainer : alexandre.delanoe@iscpif.fr
Stability : experimental
Portability : POSIX
According to the language of the text, parseDate1 returns date as Text:
TODO : Add some tests
import Data.Gargantext.Parsers as DGP
DGP.parseDate1 DGP.FR "12 avril 2010" == "2010-04-12T00:00:00.000+00:00"
-}
module
Data.Gargantext.Parsers.Date
(
parseDate1
,
Lang
(
FR
,
EN
))
where
import
Data.Gargantext.Prelude
import
qualified
Data.Gargantext.Types.Main
as
G
import
Data.Time.Clock
(
UTCTime
,
getCurrentTime
)
import
Data.Time.LocalTime.TimeZone.Series
(
zonedTimeToZoneSeriesTime
)
...
...
@@ -6,28 +26,62 @@ import Data.Time.LocalTime (utc)
import
Duckling.Resolve
(
fromUTC
,
Context
(
Context
,
referenceTime
,
locale
)
,
DucklingTime
(
DucklingTime
)
)
--import Duckling.Core (makeLocale, Lang(FR,EN), Some(This), Dimension(Time))
import
Duckling.Core
(
makeLocale
,
Lang
(),
Some
(
This
),
Dimension
(
Time
))
import
Duckling.Core
(
makeLocale
,
Lang
(
FR
,
EN
),
Some
(
This
),
Dimension
(
Time
))
import
Duckling.Types
(
jsonValue
)
--import qualified Duckling.Core as DC
import
Duckling.Api
(
analyze
)
import
qualified
Data.HashSet
as
HashSet
import
qualified
Data.Aeson
as
Json
import
Data.HashMap.Strict
as
HM
import
Data.Text
(
Text
)
-- import Duckling.Engine (parseAndResolve)
-- import Duckling.Rules (rulesFor)
-- import Duckling.Debug as DB
import
Safe
(
headMay
)
import
Duckling.Types
(
ResolvedToken
)
-- TODO add Paris at Duckling.Locale Region datatype
-- | To get Homogeinity of the languages
-- TODO : put this in a more generic place in the source code
parserLang
::
G
.
Language
->
Lang
parserLang
G
.
FR
=
FR
parserLang
G
.
EN
=
EN
-- | Final Date parser API
parseDate1
::
Lang
->
Text
->
IO
Text
parseDate1
lang
text
=
do
maybeJson
<-
pm
jsonValue
<$>
parseDateWithDuckling
lang
text
case
headMay
maybeJson
of
Just
(
Json
.
Object
object
)
->
case
HM
.
lookup
"value"
object
of
Just
(
Json
.
String
date
)
->
pure
date
Just
_
->
error
"ERROR: should be a json String"
Nothing
->
error
"No date found"
Just
_
->
error
"ERROR: should be a json Object"
Nothing
->
pure
"No date found"
-- | Current Time in DucklingTime format
-- TODO : get local Time in a more generic way
utcToDucklingTime
::
UTCTime
->
DucklingTime
utcToDucklingTime
time
=
DucklingTime
.
zonedTimeToZoneSeriesTime
$
fromUTC
time
utc
-- | Local Context which depends on Lang and Time
localContext
::
Lang
->
DucklingTime
->
Context
localContext
lang
dt
=
Context
{
referenceTime
=
dt
,
locale
=
makeLocale
lang
Nothing
}
parseDate
::
Lang
->
Text
->
IO
[
ResolvedToken
]
parseDate
lang
input
=
do
-- | Date parser with Duckling
parseDateWithDuckling
::
Lang
->
Text
->
IO
[
ResolvedToken
]
parseDateWithDuckling
lang
input
=
do
ctx
<-
localContext
lang
<$>
utcToDucklingTime
<$>
getCurrentTime
--pure $ parseAndResolve (rulesFor (locale ctx) (HashSet.fromList [(This Time)])) input ctx
pure
$
analyze
input
ctx
$
HashSet
.
fromList
[(
This
Time
)]
src/Data/Gargantext/Parsers/WOS.hs
View file @
5c1b33ff
...
...
@@ -50,7 +50,6 @@ endNotice = manyTill anyChar (string $ pack "\nER\n")
startNotice
::
Parser
ByteString
startNotice
=
"
\n
PT "
*>
takeTill
isEndOfLine
field'
::
Parser
(
ByteString
,
[
ByteString
])
field'
=
do
f
<-
"
\n
"
*>
take
2
<*
" "
...
...
@@ -113,8 +112,8 @@ parseFile p x = case runParser p x of
Left
_
->
pure
0
Right
r
->
pure
$
length
r
test
Wos
::
FilePath
->
IO
[
Int
]
test
Wos
fp
=
join
$
mapConcurrently
(
parseFile
WOS
)
<$>
zipFiles
fp
parse
Wos
::
FilePath
->
IO
[
Int
]
parse
Wos
fp
=
join
$
mapConcurrently
(
parseFile
WOS
)
<$>
zipFiles
fp
src/Data/Gargantext/Prelude.hs
View file @
5c1b33ff
...
...
@@ -36,6 +36,8 @@ import qualified Data.List as L hiding (head, sum)
import
qualified
Control.Monad
as
M
import
qualified
Data.Map
as
Map
import
qualified
Data.Vector
as
V
import
Safe
(
headMay
)
pf
::
(
a
->
Bool
)
->
[
a
]
->
[
a
]
pf
=
filter
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment