Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
152
Issues
152
List
Board
Labels
Milestones
Merge Requests
9
Merge Requests
9
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
gargantext
haskell-gargantext
Commits
f095ca6e
Commit
f095ca6e
authored
Mar 10, 2022
by
Przemyslaw Kaminski
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
[zip] implement zip for parsers
parent
d1ffbb9b
Pipeline
#2558
failed with stage
in 48 minutes and 11 seconds
Changes
5
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
29 additions
and
18 deletions
+29
-18
Main.hs
bin/gargantext-import/Main.hs
+3
-3
Main.hs
bin/gargantext-phylo/Main.hs
+2
-2
New.hs
src/Gargantext/API/Node/Corpus/New.hs
+4
-4
FrameCalcUpload.hs
src/Gargantext/API/Node/FrameCalcUpload.hs
+2
-2
Parsers.hs
src/Gargantext/Core/Text/Corpus/Parsers.hs
+18
-7
No files found.
bin/gargantext-import/Main.hs
View file @
f095ca6e
...
...
@@ -35,7 +35,7 @@ import Gargantext.Database.Admin.Types.Hyperdata (toHyperdataDocument)
import
Gargantext.Database.Admin.Types.Node
(
CorpusId
)
import
Gargantext.Database.Prelude
(
Cmd
)
import
Gargantext.Prelude
import
Gargantext.Core.Text.Corpus.Parsers
(
FileFormat
(
..
))
import
Gargantext.Core.Text.Corpus.Parsers
(
FileFormat
(
..
)
,
FileType
(
..
)
)
main
::
IO
()
main
=
do
...
...
@@ -51,10 +51,10 @@ main = do
Nothing
->
panic
$
"Cannot read limit: "
<>
(
Text
.
pack
limit
)
Just
l
->
l
corpus
::
forall
m
.
FlowCmdM
DevEnv
GargError
m
=>
m
CorpusId
corpus
=
flowCorpusFile
(
UserName
$
cs
user
)
(
Left
(
cs
name
::
Text
))
limit'
tt
format
corpusPath
Nothing
(
\
_
->
pure
()
)
corpus
=
flowCorpusFile
(
UserName
$
cs
user
)
(
Left
(
cs
name
::
Text
))
limit'
tt
format
Plain
corpusPath
Nothing
(
\
_
->
pure
()
)
corpusCsvHal
::
forall
m
.
FlowCmdM
DevEnv
GargError
m
=>
m
CorpusId
corpusCsvHal
=
flowCorpusFile
(
UserName
$
cs
user
)
(
Left
(
cs
name
::
Text
))
limit'
tt
CsvHal
corpusPath
Nothing
(
\
_
->
pure
()
)
corpusCsvHal
=
flowCorpusFile
(
UserName
$
cs
user
)
(
Left
(
cs
name
::
Text
))
limit'
tt
CsvHal
Plain
corpusPath
Nothing
(
\
_
->
pure
()
)
annuaire
::
forall
m
.
FlowCmdM
DevEnv
GargError
m
=>
m
CorpusId
annuaire
=
flowAnnuaire
(
UserName
$
cs
user
)
(
Left
"Annuaire"
)
(
Multi
EN
)
corpusPath
(
\
_
->
pure
()
)
...
...
bin/gargantext-phylo/Main.hs
View file @
f095ca6e
...
...
@@ -30,7 +30,7 @@ import GHC.IO (FilePath)
import
Gargantext.API.Ngrams.Prelude
(
toTermList
)
import
Gargantext.API.Ngrams.Types
import
Gargantext.Core.Text.Context
(
TermList
)
import
Gargantext.Core.Text.Corpus.Parsers
(
FileFormat
(
..
),
parseFile
)
import
Gargantext.Core.Text.Corpus.Parsers
(
FileFormat
(
..
),
FileType
(
..
),
parseFile
)
import
Gargantext.Core.Text.Corpus.Parsers.CSV
(
csv_title
,
csv_abstract
,
csv_publication_year
,
csv_publication_month
,
csv_publication_day
,
csv'_source
,
csv'_title
,
csv'_abstract
,
csv'_publication_year
,
csv'_publication_month
,
csv'_publication_day
,
csv'_weight
)
import
Gargantext.Core.Text.List.Formats.CSV
(
csvMapTermList
)
import
Gargantext.Core.Text.Terms.WithList
(
Patterns
,
buildPatterns
,
extractTermsWithList
)
...
...
@@ -94,7 +94,7 @@ wosToDocs limit patterns time path = do
<$>
mapConcurrently
(
\
file
->
filter
(
\
d
->
(
isJust
$
_hd_publication_year
d
)
&&
(
isJust
$
_hd_title
d
))
<$>
fromRight
[]
<$>
parseFile
WOS
(
path
<>
file
)
)
files
<$>
fromRight
[]
<$>
parseFile
WOS
Plain
(
path
<>
file
)
)
files
-- To transform a Csv file into a list of Document
...
...
src/Gargantext/API/Node/Corpus/New.hs
View file @
f095ca6e
...
...
@@ -42,7 +42,7 @@ import Gargantext.Prelude
import
Gargantext.API.Admin.Orchestrator.Types
(
JobLog
(
..
),
AsyncJobs
,
ScraperEvent
(
..
),
scst_events
)
import
Gargantext.API.Admin.Types
(
HasSettings
)
import
Gargantext.API.Job
(
addEvent
,
jobLogSuccess
,
jobLogFailTotal
,
jobLogFailTotalWithMessage
)
import
Gargantext.API.Job
(
addEvent
,
jobLogSuccess
,
jobLogFailTotal
)
import
Gargantext.API.Node.Corpus.New.Types
import
Gargantext.API.Node.Corpus.Searx
import
Gargantext.API.Node.Corpus.Types
...
...
@@ -50,7 +50,7 @@ import Gargantext.API.Node.Types
import
Gargantext.Core
(
Lang
(
..
)
{-, allLangs-}
)
import
Gargantext.Core.Text.List.Social
(
FlowSocialListWith
(
..
))
import
qualified
Gargantext.Core.Text.Corpus.API
as
API
import
qualified
Gargantext.Core.Text.Corpus.Parsers
as
Parser
(
File
Format
(
..
),
File
Type
(
..
),
parseFormatC
)
import
qualified
Gargantext.Core.Text.Corpus.Parsers
as
Parser
(
FileType
(
..
),
parseFormatC
)
import
Gargantext.Core.Types.Individu
(
User
(
..
))
import
Gargantext.Core.Utils.Prefix
(
unPrefix
,
unPrefixSwagger
)
import
Gargantext.Database.Action.Flow
(
flowCorpus
,
getDataText
,
flowDataText
,
TermType
(
..
)
{-, allDataOrigins-}
)
...
...
@@ -270,7 +270,7 @@ addToCorpusWithForm user cid (NewWithForm ft ff d l _n) logStatus jobLog = do
printDebug
"[addToCorpusWithForm] fileFormat"
ff
logStatus
jobLog
limit'
<-
view
$
hasConfig
.
gc_max_docs_parsers
let
limit
=
fromIntegral
limit'
let
limit
=
fromIntegral
limit'
::
Integer
let
parseC
=
case
ft
of
CSV_HAL
->
Parser
.
parseFormatC
Parser
.
CsvHal
...
...
@@ -315,7 +315,7 @@ addToCorpusWithForm user cid (NewWithForm ft ff d l _n) logStatus jobLog = do
(
Multi
$
fromMaybe
EN
l
)
Nothing
--(Just $ fromIntegral $ length docs, docsC')
(
Just
0
,
docsC'
)
-- TODO fix number of docs
(
Just
0
,
transPipe
liftBase
docsC'
)
-- TODO fix number of docs
--(map (map toHyperdataDocument) docs)
(
logStatus
)
...
...
src/Gargantext/API/Node/FrameCalcUpload.hs
View file @
f095ca6e
...
...
@@ -20,7 +20,7 @@ import Web.FormUrlEncoded (FromForm)
import
Gargantext.API.Admin.Orchestrator.Types
(
JobLog
(
..
),
AsyncJobs
)
import
Gargantext.API.Job
(
jobLogInit
,
jobLogSuccess
,
jobLogFail
)
import
Gargantext.API.Node.Corpus.New
(
addToCorpusWithForm
)
import
Gargantext.API.Node.Corpus.New.
File
(
FileType
(
..
))
import
Gargantext.API.Node.Corpus.New.
Types
(
FileFormat
(
..
),
FileType
(
..
))
import
Gargantext.API.Node.Types
(
NewWithForm
(
..
))
import
Gargantext.API.Prelude
import
Gargantext.Core.Types.Individu
(
User
(
..
))
...
...
@@ -87,6 +87,6 @@ frameCalcUploadAsync uId nId _f logStatus jobLog = do
jobLog2
<-
case
mCId
of
Nothing
->
pure
$
jobLogFail
jobLog
Just
cId
->
addToCorpusWithForm
(
RootId
(
NodeId
uId
))
cId
(
NewWithForm
CSV
body
Nothing
"calc-upload.csv"
)
logStatus
jobLog
addToCorpusWithForm
(
RootId
(
NodeId
uId
))
cId
(
NewWithForm
CSV
Plain
body
Nothing
"calc-upload.csv"
)
logStatus
jobLog
pure
$
jobLogSuccess
jobLog2
src/Gargantext/Core/Text/Corpus/Parsers.hs
View file @
f095ca6e
...
...
@@ -35,7 +35,7 @@ import Data.List (concat, lookup)
import
Data.Ord
()
import
Data.String
(
String
())
import
Data.String
()
import
Data.Text
(
Text
)
import
Data.Text
(
Text
,
intercalate
,
pack
,
unpack
)
import
Data.Text.Encoding
(
decodeUtf8
)
import
Data.Tuple.Extra
(
both
,
first
,
second
)
import
System.FilePath
(
FilePath
(),
takeExtension
)
...
...
@@ -70,8 +70,7 @@ type ParseError = String
-- | According to the format of Input file,
-- different parser are available.
data
FileType
=
WOS
|
RIS
|
RisPresse
|
CsvGargV3
|
CsvHal
data
FileType
=
WOS
|
RIS
|
RisPresse
|
CsvGargV3
|
CsvHal
deriving
(
Show
)
-- Implemented (ISI Format)
...
...
@@ -96,12 +95,24 @@ parseFormatC WOS Plain bs = do
.|
mapC
(
map
$
first
WOS
.
keys
)
.|
mapC
(
map
$
both
decodeUtf8
)
.|
mapMC
(
toDoc
WOS
))
<$>
eDocs
parseFormatC
_
ft
ZIP
bs
=
do
parseFormatC
ft
ZIP
bs
=
do
path
<-
liftBase
$
emptySystemTempFile
"parsed-zip"
liftBase
$
DB
.
writeFile
path
bs
parsedZip
<-
liftBase
$
withArchive
path
$
do
DM
.
keys
<$>
getEntries
pure
$
Left
$
"Not implemented for ZIP, parsedZip"
<>
show
parsedZip
fileContents
<-
liftBase
$
withArchive
path
$
do
files
<-
DM
.
keys
<$>
getEntries
mapM
getEntry
files
--printDebug "[parseFormatC] fileContents" fileContents
eContents
<-
mapM
(
parseFormatC
ft
Plain
)
fileContents
--printDebug "[parseFormatC] contents" contents
--pure $ Left $ "Not implemented for ZIP"
let
(
errs
,
contents
)
=
partitionEithers
eContents
case
errs
of
[]
->
case
contents
of
[]
->
pure
$
Left
"No files in zip"
_
->
pure
$
Right
$
(
sequenceConduits
contents
>>
pure
()
)
-- .| mapM_C (printDebug "[parseFormatC] doc")
_
->
pure
$
Left
$
unpack
$
intercalate
"
\n
"
$
pack
<$>
errs
parseFormatC
_
_
_
=
undefined
-- parseFormat :: FileType -> DB.ByteString -> IO (Either Prelude.String [HyperdataDocument])
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment