Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
153
Issues
153
List
Board
Labels
Milestones
Merge Requests
9
Merge Requests
9
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
gargantext
haskell-gargantext
Commits
2ddd6408
Commit
2ddd6408
authored
Apr 26, 2023
by
Alexandre Delanoë
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
[FEAT] Alceste/Iramuteq Parser
parent
e49efe51
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
42 additions
and
13 deletions
+42
-13
gargantext.cabal
gargantext.cabal
+1
-0
Parsers.hs
src/Gargantext/Core/Text/Corpus/Parsers.hs
+24
-7
Iramuteq.hs
src/Gargantext/Core/Text/Corpus/Parsers/Iramuteq.hs
+17
-6
No files found.
gargantext.cabal
View file @
2ddd6408
...
...
@@ -192,6 +192,7 @@ library
Gargantext.Core.Text.Corpus.Parsers.Date.Attoparsec
Gargantext.Core.Text.Corpus.Parsers.FrameWrite
Gargantext.Core.Text.Corpus.Parsers.GrandDebat
Gargantext.Core.Text.Corpus.Parsers.Iramuteq
Gargantext.Core.Text.Corpus.Parsers.Isidore
Gargantext.Core.Text.Corpus.Parsers.Json2Csv
Gargantext.Core.Text.Corpus.Parsers.RIS
...
...
src/Gargantext/Core/Text/Corpus/Parsers.hs
View file @
2ddd6408
...
...
@@ -44,6 +44,7 @@ import Gargantext.Core (Lang(..))
import
Gargantext.Core.Text.Corpus.Parsers.CSV
(
parseHal
,
parseCsv
,
parseCsvC
)
import
Gargantext.Core.Text.Corpus.Parsers.RIS.Presse
(
presseEnrich
)
import
Gargantext.Database.Admin.Types.Hyperdata
(
HyperdataDocument
(
..
))
import
Gargantext.Database.Query.Table.Ngrams
(
NgramsType
(
..
))
import
Gargantext.Prelude
import
System.FilePath
(
FilePath
(),
takeExtension
)
import
System.IO.Temp
(
emptySystemTempFile
)
...
...
@@ -52,11 +53,12 @@ import qualified Data.ByteString.Char8 as DBC
import
qualified
Data.ByteString.Lazy
as
DBL
import
qualified
Data.Map
as
DM
import
qualified
Data.Text
as
DT
import
qualified
Gargantext.Core.Text.Corpus.Parsers.Date
as
Date
import
qualified
Gargantext.Core.Text.Corpus.Parsers.RIS
as
RIS
import
qualified
Gargantext.Core.Text.Corpus.Parsers.WOS
as
WOS
import
qualified
Data.Text
as
Text
import
qualified
Gargantext.Core.Text.Corpus.Parsers.Date
as
Date
import
qualified
Gargantext.Core.Text.Corpus.Parsers.Iramuteq
as
Iramuteq
import
qualified
Gargantext.Core.Text.Corpus.Parsers.RIS
as
RIS
import
qualified
Gargantext.Core.Text.Corpus.Parsers.WOS
as
WOS
import
qualified
Prelude
import
Gargantext.Database.Query.Table.Ngrams
(
NgramsType
(
..
))
------------------------------------------------------------------------
type
ParseError
=
String
...
...
@@ -70,7 +72,12 @@ type ParseError = String
-- | According to the format of Input file,
-- different parser are available.
data
FileType
=
WOS
|
RIS
|
RisPresse
|
CsvGargV3
|
CsvHal
data
FileType
=
WOS
|
RIS
|
RisPresse
|
CsvGargV3
|
CsvHal
|
Iramuteq
deriving
(
Show
)
-- Implemented (ISI Format)
...
...
@@ -177,6 +184,14 @@ parseFile WOS Plain p = do
docs
<-
join
$
mapM
(
toDoc
WOS
)
<$>
snd
<$>
enrichWith
WOS
<$>
readFileWith
WOS
p
pure
$
Right
docs
parseFile
Iramuteq
Plain
p
=
do
docs
<-
join
$
mapM
((
toDoc
Iramuteq
)
.
(
map
(
second
(
Text
.
replace
"_"
" "
))))
<$>
snd
<$>
enrichWith
Iramuteq
<$>
readFileWith
Iramuteq
p
pure
$
Right
docs
parseFile
ff
_
p
=
do
docs
<-
join
$
mapM
(
toDoc
ff
)
<$>
snd
<$>
enrichWith
ff
<$>
readFileWith
ff
p
pure
$
Right
docs
...
...
@@ -217,6 +232,7 @@ enrichWith :: FileType
->
(
a
,
[[[(
DB
.
ByteString
,
DB
.
ByteString
)]]])
->
(
a
,
[[(
Text
,
Text
)]])
enrichWith
RisPresse
=
enrichWith'
presseEnrich
enrichWith
WOS
=
enrichWith'
(
map
(
first
WOS
.
keys
))
enrichWith
Iramuteq
=
enrichWith'
(
map
(
first
Iramuteq
.
keys
))
enrichWith
_
=
enrichWith'
identity
...
...
@@ -241,8 +257,9 @@ readFileWith format path = do
-- According to the format of the text, choose the right parser.
-- TODO withParser :: FileType -> Parser [Document]
withParser
::
FileType
->
Parser
[[(
DB
.
ByteString
,
DB
.
ByteString
)]]
withParser
WOS
=
WOS
.
parser
withParser
RIS
=
RIS
.
parser
withParser
WOS
=
WOS
.
parser
withParser
RIS
=
RIS
.
parser
withParser
Iramuteq
=
Iramuteq
.
parser
--withParser ODT = odtParser
--withParser XML = xmlParser
withParser
_
=
panic
"[ERROR] Parser not implemented yet"
...
...
src/Gargantext/Core/Text/Corpus/Parsers/Iramuteq.hs
View file @
2ddd6408
...
...
@@ -12,7 +12,7 @@ commentary with @some markup@.
-}
module
Gargantext.Core.Text.Corpus.Parsers.Iramuteq
(
parseIramuteqFile
,
notice
s
)
where
module
Gargantext.Core.Text.Corpus.Parsers.Iramuteq
(
parseIramuteqFile
,
parser
,
key
s
)
where
import
Control.Applicative
import
Data.Attoparsec.ByteString
(
Parser
,
takeTill
,
parseOnly
)
...
...
@@ -21,14 +21,14 @@ import Data.ByteString (ByteString)
import
Prelude
hiding
(
takeWhile
,
take
,
concat
,
readFile
,
lines
,
concat
)
import
qualified
Data.ByteString
as
DB
parseIramuteqFile
::
String
->
IO
(
Either
String
[[(
ByteString
,
ByteString
)]])
parseIramuteqFile
::
FilePath
->
IO
(
Either
String
[[(
ByteString
,
ByteString
)]])
parseIramuteqFile
fp
=
do
txts
<-
DB
.
readFile
fp
pure
$
parseOnly
notices
txts
pure
$
parseOnly
parser
txts
-------------------------------------------------------------
notices
::
Parser
[[(
ByteString
,
ByteString
)]]
notices
=
do
parser
::
Parser
[[(
ByteString
,
ByteString
)]]
parser
=
do
ns
<-
(
many
notice
)
pure
ns
...
...
@@ -70,4 +70,15 @@ parseOf ptxt pa = bothParse <|> empty
where
bothParse
=
ptxt
>>=
constP
pa
-----------------------------------------------------------------
-- These keys may not be constant for Iramuteq files formats
keys
::
ByteString
->
ByteString
keys
f
|
f
==
"id"
=
"doi"
|
f
==
"qui"
=
"authors"
|
f
==
"quand"
=
"PY"
|
f
==
"type"
=
"source"
|
f
==
"titre"
=
"title"
|
f
==
"ou"
=
"institutes"
|
f
==
"text"
=
"abstract"
|
otherwise
=
f
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment