Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Grégoire Locqueville
haskell-gargantext
Commits
90d6620e
Commit
90d6620e
authored
Mar 31, 2022
by
Alexandre Delanoë
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
[FIX] Regex error with Duckling
parent
f1538012
Changes
6
Show whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
117 additions
and
37 deletions
+117
-37
install
bin/install
+2
-1
server
server
+1
-1
New.hs
src/Gargantext/API/Node/Corpus/New.hs
+10
-6
Date.hs
src/Gargantext/Core/Text/Corpus/Parsers/Date.hs
+70
-29
Types.hs
src/Gargantext/Core/Types.hs
+10
-0
Flow.hs
src/Gargantext/Database/Action/Flow.hs
+24
-0
No files found.
bin/install
View file @
90d6620e
#!/bin/bash
#stack install --nix --profile --test --fast --no-install-ghc --skip-ghc-check
stack
install
--nix
--test
--no-install-ghc
--skip-ghc-check
env
LANG
=
C.UTF-8 stack
install
--nix
--test
--no-install-ghc
--skip-ghc-check
server
View file @
90d6620e
...
...
@@ -6,4 +6,4 @@ LOGFILE=$FOLDER"/"$FILE
mkdir
-p
$FOLDER
~/.local/bin/gargantext-server
--ini
gargantext.ini
--run
Dev +RTS
>
$LOGFILE
2>&1 &
tail
-F
$LOGFILE
# -p
env
LANG
=
en_US.UTF-8
~/.local/bin/gargantext-server
--ini
gargantext.ini
--run
Dev +RTS
>
$LOGFILE
2>&1 &
tail
-F
$LOGFILE
# -p
src/Gargantext/API/Node/Corpus/New.hs
View file @
90d6620e
...
...
@@ -18,7 +18,6 @@ New corpus means either:
module
Gargantext.API.Node.Corpus.New
where
import
Conduit
import
Control.Lens
hiding
(
elements
,
Empty
)
import
Data.Aeson
...
...
@@ -38,8 +37,6 @@ import qualified Data.Text.Encoding as TE
-- import Test.QuickCheck (elements)
import
Test.QuickCheck.Arbitrary
import
Gargantext.Prelude
import
Gargantext.API.Admin.Orchestrator.Types
(
JobLog
(
..
),
AsyncJobs
,
ScraperEvent
(
..
),
scst_events
)
import
Gargantext.API.Admin.Types
(
HasSettings
)
import
Gargantext.API.Job
(
addEvent
,
jobLogSuccess
,
jobLogFailTotal
)
...
...
@@ -49,8 +46,6 @@ import Gargantext.API.Node.Corpus.Types
import
Gargantext.API.Node.Types
import
Gargantext.Core
(
Lang
(
..
)
{-, allLangs-}
)
import
Gargantext.Core.Text.List.Social
(
FlowSocialListWith
(
..
))
import
qualified
Gargantext.Core.Text.Corpus.API
as
API
import
qualified
Gargantext.Core.Text.Corpus.Parsers
as
Parser
(
FileType
(
..
),
parseFormatC
)
import
Gargantext.Core.Types.Individu
(
User
(
..
))
import
Gargantext.Core.Utils.Prefix
(
unPrefix
,
unPrefixSwagger
)
import
Gargantext.Database.Action.Flow
(
flowCorpus
,
getDataText
,
flowDataText
,
TermType
(
..
)
{-, allDataOrigins-}
)
...
...
@@ -64,8 +59,11 @@ import Gargantext.Database.Prelude (hasConfig)
import
Gargantext.Database.Query.Table.Node
(
getNodeWith
)
import
Gargantext.Database.Query.Table.Node.UpdateOpaleye
(
updateHyperdata
)
import
Gargantext.Database.Schema.Node
(
node_hyperdata
)
import
qualified
Gargantext.Database.GargDB
as
GargDB
import
Gargantext.Prelude
import
Gargantext.Prelude.Config
(
gc_max_docs_parsers
)
import
qualified
Gargantext.Core.Text.Corpus.API
as
API
import
qualified
Gargantext.Core.Text.Corpus.Parsers
as
Parser
(
FileType
(
..
),
parseFormatC
)
import
qualified
Gargantext.Database.GargDB
as
GargDB
------------------------------------------------------------------------
{-
data Query = Query { query_query :: Text
...
...
@@ -177,6 +175,8 @@ type AddWithFile = Summary "Add with MultipartData to corpus endpoint"
------------------------------------------------------------------------
-- TODO WithQuery also has a corpus id
addToCorpusWithQuery
::
FlowCmdM
env
err
m
=>
User
->
CorpusId
...
...
@@ -216,8 +216,11 @@ addToCorpusWithQuery user cid (WithQuery { _wq_query = q
-- TODO if cid is folder -> create Corpus
-- if cid is corpus -> add to corpus
-- if cid is root -> create corpus in Private
printDebug
"[G.A.N.C.New] getDataText with query"
q
eTxts
<-
mapM
(
\
db
->
getDataText
db
(
Multi
l
)
q
maybeLimit
)
[
database2origin
dbs
]
let
lTxts
=
lefts
eTxts
printDebug
"[G.A.N.C.New] eTxts"
lTxts
case
lTxts
of
[]
->
do
let
txts
=
rights
eTxts
...
...
@@ -241,6 +244,7 @@ addToCorpusWithQuery user cid (WithQuery { _wq_query = q
}
(
err
:
_
)
->
do
printDebug
"Error: "
err
pure
$
addEvent
"ERROR"
(
T
.
pack
$
show
err
)
$
JobLog
{
_scst_succeeded
=
Just
2
,
_scst_failed
=
Just
1
...
...
src/Gargantext/Core/Text/Corpus/Parsers/Date.hs
View file @
90d6620e
...
...
@@ -20,10 +20,12 @@ module Gargantext.Core.Text.Corpus.Parsers.Date
{-(parse, parseRaw, dateSplit, Year, Month, Day)-}
where
import
System.Environment
(
getEnv
)
import
Data.Aeson
(
toJSON
,
Value
)
import
Data.Either
(
Either
(
..
))
import
Data.HashMap.Strict
as
HM
hiding
(
map
)
import
Data.Text
(
Text
,
unpack
,
splitOn
)
import
Data.Time
(
parseTimeOrError
,
defaultTimeLocale
,
toGregorian
)
import
Data.Text
(
Text
,
unpack
,
splitOn
,
replace
)
import
Data.Time
(
defaultTimeLocale
,
toGregorian
,
iso8601DateFormat
,
parseTimeM
)
import
Data.Time.Clock
(
UTCTime
(
..
),
getCurrentTime
)
import
Data.Time.LocalTime
(
utc
)
import
Data.Time.LocalTime.TimeZone.Series
(
zonedTimeToZoneSeriesTime
)
...
...
@@ -33,7 +35,9 @@ import Duckling.Types (Seal(..))
import
Duckling.Resolve
(
fromUTC
,
Context
(
Context
,
referenceTime
,
locale
),
DucklingTime
(
DucklingTime
),
Options
(
..
))
import
Duckling.Types
(
ResolvedToken
(
..
),
ResolvedVal
(
..
))
import
Gargantext.Core
(
Lang
(
FR
,
EN
))
import
Gargantext.Core.Types
(
DebugMode
(
..
),
withDebugMode
)
import
Gargantext.Prelude
import
qualified
Control.Exception
as
CE
import
qualified
Data.Aeson
as
Json
import
qualified
Data.HashSet
as
HashSet
import
qualified
Duckling.Core
as
DC
...
...
@@ -61,31 +65,55 @@ type Day = Int
-- | Date Parser
-- Parses dates mentions in full text given the language.
-- >>> parse
Date FR (pack "10 avril 1979
à 19H")
-- 19
79
-04-10 19:00:00 UTC
-- >>> parse
Date EN (pack "April 10 1979
")
-- 19
79
-04-10 00:00:00 UTC
-- >>> parse
FR (pack "10 avril 1900
à 19H")
-- 19
00
-04-10 19:00:00 UTC
-- >>> parse
EN (pack "April 10 1900
")
-- 19
00
-04-10 00:00:00 UTC
parse
::
Lang
->
Text
->
IO
UTCTime
parse
lang
s
=
parseDate'
"%Y-%m-%dT%T"
"0-0-0T0:0:0"
lang
s
parse
lang
s
=
do
dateStr'
<-
parseRawSafe
lang
s
case
dateFlow
dateStr'
of
DateFlowSuccess
ok
->
pure
ok
_
->
withDebugMode
(
DebugMode
True
)
"[G.C.T.P.T.Date parse]"
(
lang
,
s
)
$
getCurrentTime
defaultDate
::
Text
defaultDate
=
"0-0-0T0:0:0"
type
DateFormat
=
Text
type
DateDefault
=
Text
parseDate'
::
DateFormat
->
DateDefault
->
Lang
->
Text
->
IO
UTCTime
parseDate'
format
def
lang
s
=
do
dateStr'
<-
parseRaw
lang
s
if
dateStr'
==
""
then
getCurrentTime
else
do
let
dateStr
=
unpack
$
maybe
def
identity
$
head
$
splitOn
"."
dateStr'
pure
$
parseTimeOrError
True
defaultTimeLocale
(
unpack
format
)
dateStr
data
DateFlow
=
DucklingSuccess
{
ds_result
::
Text
}
|
DucklingFailure
{
df_result
::
Text
}
|
ReadFailure1
{
rf1_result
::
Text
}
|
ReadFailure2
{
rf2_result
::
Text
}
|
DateFlowSuccess
{
success
::
UTCTime
}
|
DateFlowFailure
deriving
Show
--{-
dateFlow
::
DateFlow
->
DateFlow
dateFlow
(
DucklingSuccess
res
)
=
case
(
head
$
splitOn
"."
res
)
of
Nothing
->
dateFlow
(
ReadFailure1
res
)
Just
re
->
case
readDate
res
of
Nothing
->
dateFlow
(
ReadFailure1
re
)
Just
ok
->
DateFlowSuccess
ok
dateFlow
(
DucklingFailure
txt
)
=
case
readDate
$
replace
" "
"T"
txt
of
Nothing
->
dateFlow
(
ReadFailure1
txt
)
Just
ok
->
DateFlowSuccess
ok
dateFlow
(
ReadFailure1
txt
)
=
case
readDate
txt
of
Nothing
->
DateFlowFailure
Just
ok
->
DateFlowSuccess
ok
dateFlow
_
=
DateFlowFailure
--}
readDate
::
Text
->
Maybe
UTCTime
readDate
txt
=
do
let
format
=
cs
$
iso8601DateFormat
(
Just
"%H:%M:%S"
)
parseTimeM
True
defaultTimeLocale
(
unpack
format
)
(
cs
txt
)
-- TODO add Paris at Duckling.Locale Region datatype
...
...
@@ -94,7 +122,7 @@ parseDate' format def lang s = do
parserLang
::
Lang
->
DC
.
Lang
parserLang
FR
=
DC
.
FR
parserLang
EN
=
DC
.
EN
parserLang
_
=
panic
"not implemented"
parserLang
lang
=
panic
$
"[G.C.T.C.P.Date] Lang not implemented"
<>
(
cs
$
show
lang
)
-- | Final Date parser API
-- IO can be avoided here:
...
...
@@ -102,6 +130,21 @@ parserLang _ = panic "not implemented"
-- currentContext lang = localContext lang <$> utcToDucklingTime <$> getCurrentTime
-- parseRaw :: Context -> Text -> SomeErrorHandling Text
parseRawSafe
::
Lang
->
Text
->
IO
DateFlow
parseRawSafe
lang
text
=
do
triedParseRaw
<-
tryParseRaw
lang
text
dateStr'
<-
case
triedParseRaw
of
Left
(
CE
.
SomeException
err
)
->
do
envLang
<-
getEnv
"LANG"
printDebug
"[G.C.T.C.P.Date] Exception: "
(
err
,
envLang
,
lang
,
text
)
pure
$
DucklingFailure
text
Right
res
->
pure
$
DucklingSuccess
res
pure
dateStr'
tryParseRaw
::
CE
.
Exception
e
=>
Lang
->
Text
->
IO
(
Either
e
Text
)
tryParseRaw
lang
text
=
CE
.
try
(
parseRaw
lang
text
)
parseRaw
::
Lang
->
Text
->
IO
Text
parseRaw
lang
text
=
do
-- case result
maybeResult
<-
extractValue
<$>
getTimeValue
...
...
@@ -146,5 +189,3 @@ parseDateWithDuckling lang input options = do
-- TODO check/test Options False or True
pure
$
analyze
input
contxt
options
$
HashSet
.
fromList
[(
Seal
Time
)]
src/Gargantext/Core/Types.hs
View file @
90d6620e
...
...
@@ -16,6 +16,7 @@ commentary with @some markup@.
module
Gargantext.Core.Types
(
module
Gargantext
.
Core
.
Types
.
Main
,
module
Gargantext
.
Database
.
Admin
.
Types
.
Node
,
DebugMode
(
..
),
withDebugMode
,
Term
,
Terms
(
..
)
,
TokenTag
(
..
),
POS
(
..
),
NER
(
..
)
,
Label
,
Stems
...
...
@@ -29,6 +30,7 @@ module Gargantext.Core.Types ( module Gargantext.Core.Types.Main
import
Control.Lens
(
Prism
'
,
(
#
),
makeLenses
,
over
)
import
Control.Monad.Except
(
MonadError
(
throwError
))
import
Debug.Trace
(
trace
)
import
Data.Aeson
import
Data.Aeson.TH
(
deriveJSON
)
import
Data.Hashable
(
Hashable
)
...
...
@@ -47,6 +49,14 @@ import Gargantext.Database.Admin.Types.Node
import
Gargantext.Prelude
import
Test.QuickCheck.Arbitrary
(
Arbitrary
,
arbitrary
)
------------------------------------------------------------------------
data
DebugMode
=
DebugMode
{
activated
::
Bool
}
withDebugMode
::
(
Show
a
)
=>
DebugMode
->
Text
->
a
->
b
->
b
withDebugMode
(
DebugMode
True
)
msg
var
a
=
trace
(
cs
$
"DEBUG"
<>
msg
<>
(
cs
$
show
var
))
a
withDebugMode
(
DebugMode
False
)
_
_
a
=
a
------------------------------------------------------------------------
data
Ordering
=
Down
|
Up
deriving
(
Enum
,
Show
,
Eq
,
Bounded
)
...
...
src/Gargantext/Database/Action/Flow.hs
View file @
90d6620e
...
...
@@ -26,6 +26,7 @@ Portability : POSIX
module
Gargantext.Database.Action.Flow
-- (flowDatabase, ngrams2list)
(
DataText
(
..
)
,
getDataText
,
getDataText_Debug
,
flowDataText
,
flow
...
...
@@ -67,6 +68,8 @@ import System.FilePath (FilePath)
import
qualified
Data.HashMap.Strict
as
HashMap
import
qualified
Gargantext.Data.HashMap.Strict.Utils
as
HashMap
import
qualified
Data.Map
as
Map
import
qualified
Data.Conduit.List
as
CL
import
qualified
Data.Conduit
as
C
import
Gargantext.API.Admin.Orchestrator.Types
(
JobLog
(
..
))
import
Gargantext.Core
(
Lang
(
..
),
PosTagAlgo
(
..
))
...
...
@@ -134,6 +137,13 @@ data DataText = DataOld ![NodeId]
|
DataNew
!
(
Maybe
Integer
,
ConduitT
()
HyperdataDocument
IO
()
)
-- | DataNew ![[HyperdataDocument]]
-- Show instance is not possible because of IO
printDataText
::
DataText
->
IO
()
printDataText
(
DataOld
xs
)
=
putStrLn
$
show
xs
printDataText
(
DataNew
(
maybeInt
,
conduitData
))
=
do
res
<-
C
.
runConduit
(
conduitData
.|
CL
.
consume
)
putStrLn
$
show
(
maybeInt
,
res
)
-- TODO use the split parameter in config file
getDataText
::
FlowCmdM
env
err
m
=>
DataOrigin
...
...
@@ -153,6 +163,20 @@ getDataText (InternalOrigin _) _la q _li = do
ids
<-
map
fst
<$>
searchDocInDatabase
cId
(
stemIt
q
)
pure
$
Right
$
DataOld
ids
getDataText_Debug
::
FlowCmdM
env
err
m
=>
DataOrigin
->
TermType
Lang
->
API
.
Query
->
Maybe
API
.
Limit
->
m
()
getDataText_Debug
a
l
q
li
=
do
result
<-
getDataText
a
l
q
li
case
result
of
Left
err
->
liftBase
$
putStrLn
$
show
err
Right
res
->
liftBase
$
printDataText
res
-------------------------------------------------------------------------------
flowDataText
::
forall
env
err
m
.
(
FlowCmdM
env
err
m
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment