Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
H
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Przemyslaw Kaminski
haskell-gargantext
Commits
cf6fbff6
Verified
Commit
cf6fbff6
authored
Sep 13, 2021
by
Przemyslaw Kaminski
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
[refactoring] more record syntax refactoring
parent
37a36aba
Changes
14
Expand all
Show whitespace changes
Inline
Side-by-side
Showing
14 changed files
with
333 additions
and
296 deletions
+333
-296
Hal.hs
src/Gargantext/Core/Text/Corpus/API/Hal.hs
+19
-19
Istex.hs
src/Gargantext/Core/Text/Corpus/API/Istex.hs
+20
-19
Pubmed.hs
src/Gargantext/Core/Text/Corpus/API/Pubmed.hs
+19
-19
Parsers.hs
src/Gargantext/Core/Text/Corpus/Parsers.hs
+19
-19
CSV.hs
src/Gargantext/Core/Text/Corpus/Parsers/CSV.hs
+168
-156
GrandDebat.hs
src/Gargantext/Core/Text/Corpus/Parsers/GrandDebat.hs
+20
-12
Isidore.hs
src/Gargantext/Core/Text/Corpus/Parsers/Isidore.hs
+19
-12
Json2Csv.hs
src/Gargantext/Core/Text/Corpus/Parsers/Json2Csv.hs
+8
-2
RIS.hs
src/Gargantext/Core/Text/Corpus/Parsers/RIS.hs
+2
-0
Presse.hs
src/Gargantext/Core/Text/Corpus/Parsers/RIS/Presse.hs
+0
-2
Wikimedia.hs
src/Gargantext/Core/Text/Corpus/Parsers/Wikimedia.hs
+4
-2
List.hs
src/Gargantext/Core/Text/List.hs
+20
-20
WithStem.hs
src/Gargantext/Core/Text/List/Group/WithStem.hs
+2
-2
Terms.hs
src/Gargantext/Core/Text/Terms.hs
+13
-12
No files found.
src/Gargantext/Core/Text/Corpus/API/Hal.hs
View file @
cf6fbff6
...
...
@@ -31,23 +31,23 @@ get la q ml = do
toDoc'
::
Lang
->
HAL
.
Corpus
->
IO
HyperdataDocument
toDoc'
la
(
HAL
.
Corpus
i
t
ab
d
s
aus
affs
struct_id
)
=
do
(
utctime
,
(
pub_year
,
pub_month
,
pub_day
))
<-
Date
.
dateSplit
la
(
maybe
(
Just
"2019"
)
Just
d
)
pure
$
HyperdataDocument
(
Just
"Hal"
)
(
Just
$
pack
$
show
i
)
Nothing
Nothing
Nothing
Nothing
(
Just
$
intercalate
" "
t
)
(
Just
$
foldl
(
\
x
y
->
x
<>
", "
<>
y
)
""
aus
)
(
Just
$
foldl
(
\
x
y
->
x
<>
", "
<>
y
)
""
$
affs
<>
map
(
cs
.
show
)
struct_id
)
(
Just
$
maybe
"Nothing"
identity
s
)
(
Just
$
intercalate
" "
ab
)
(
fmap
(
pack
.
show
)
utctime
)
pub_year
pub_month
pub_day
Nothing
Nothing
Nothing
(
Just
$
(
pack
.
show
)
la
)
pure
$
HyperdataDocument
{
_hd_bdd
=
Just
"Hal"
,
_hd_doi
=
Just
$
pack
$
show
i
,
_hd_url
=
Nothing
,
_hd_uniqId
=
Nothing
,
_hd_uniqIdBdd
=
Nothing
,
_hd_page
=
Nothing
,
_hd_title
=
Just
$
intercalate
" "
t
,
_hd_authors
=
Just
$
foldl
(
\
x
y
->
x
<>
", "
<>
y
)
""
aus
,
_hd_institutes
=
Just
$
foldl
(
\
x
y
->
x
<>
", "
<>
y
)
""
$
affs
<>
map
(
cs
.
show
)
struct_id
,
_hd_source
=
Just
$
maybe
"Nothing"
identity
s
,
_hd_abstract
=
Just
$
intercalate
" "
ab
,
_hd_publication_date
=
fmap
(
pack
.
show
)
utctime
,
_hd_publication_year
=
pub_year
,
_hd_publication_month
=
pub_month
,
_hd_publication_day
=
pub_day
,
_hd_publication_hour
=
Nothing
,
_hd_publication_minute
=
Nothing
,
_hd_publication_second
=
Nothing
,
_hd_language_iso2
=
Just
$
(
pack
.
show
)
la
}
src/Gargantext/Core/Text/Corpus/API/Istex.hs
View file @
cf6fbff6
...
...
@@ -39,22 +39,23 @@ toDoc' la docs' = do
toDoc
::
Lang
->
ISTEX
.
Document
->
IO
HyperdataDocument
toDoc
la
(
ISTEX
.
Document
i
t
a
ab
d
s
)
=
do
(
utctime
,
(
pub_year
,
pub_month
,
pub_day
))
<-
Date
.
dateSplit
la
(
maybe
(
Just
"2019"
)
(
Just
.
pack
.
show
)
d
)
pure
$
HyperdataDocument
(
Just
"Istex"
)
(
Just
i
)
Nothing
Nothing
Nothing
Nothing
t
(
Just
$
foldl
(
\
x
y
->
x
<>
", "
<>
y
)
""
(
map
ISTEX
.
_author_name
a
))
(
Just
$
foldl
(
\
x
y
->
x
<>
", "
<>
y
)
""
(
concat
$
(
map
ISTEX
.
_author_affiliations
)
a
))
(
Just
$
foldl
(
\
x
y
->
x
<>
", "
<>
y
)
""
(
catMaybes
$
map
ISTEX
.
_source_title
s
))
ab
(
fmap
(
pack
.
show
)
utctime
)
pub_year
pub_month
pub_day
Nothing
Nothing
Nothing
(
Just
$
(
pack
.
show
)
la
)
pure
$
HyperdataDocument
{
_hd_bdd
=
Just
"Istex"
,
_hd_doi
=
Just
i
,
_hd_url
=
Nothing
,
_hd_uniqId
=
Nothing
,
_hd_uniqIdBdd
=
Nothing
,
_hd_page
=
Nothing
,
_hd_title
=
t
,
_hd_authors
=
Just
$
foldl
(
\
x
y
->
x
<>
", "
<>
y
)
""
(
map
ISTEX
.
_author_name
a
)
,
_hd_institutes
=
Just
$
foldl
(
\
x
y
->
x
<>
", "
<>
y
)
""
(
concat
$
(
map
ISTEX
.
_author_affiliations
)
a
)
,
_hd_source
=
Just
$
foldl
(
\
x
y
->
x
<>
", "
<>
y
)
""
(
catMaybes
$
map
ISTEX
.
_source_title
s
)
,
_hd_abstract
=
ab
,
_hd_publication_date
=
fmap
(
pack
.
show
)
utctime
,
_hd_publication_year
=
pub_year
,
_hd_publication_month
=
pub_month
,
_hd_publication_day
=
pub_day
,
_hd_publication_hour
=
Nothing
,
_hd_publication_minute
=
Nothing
,
_hd_publication_second
=
Nothing
,
_hd_language_iso2
=
Just
$
(
pack
.
show
)
la
}
src/Gargantext/Core/Text/Corpus/API/Pubmed.hs
View file @
cf6fbff6
...
...
@@ -38,25 +38,25 @@ get q l = either (\e -> panic $ "CRAWL: PubMed" <> e) (map (toDoc EN))
toDoc
::
Lang
->
PubMedDoc
.
PubMed
->
HyperdataDocument
toDoc
l
(
PubMedDoc
.
PubMed
(
PubMedDoc
.
PubMedArticle
t
j
as
aus
)
(
PubMedDoc
.
PubMedDate
a
y
m
d
)
)
=
HyperdataDocument
(
Just
"PubMed"
)
Nothing
Nothing
Nothing
Nothing
Nothing
t
(
authors
aus
)
(
institutes
aus
)
j
(
abstract
as
)
(
Just
$
Text
.
pack
$
show
a
)
(
Just
$
fromIntegral
y
)
(
Just
m
)
(
Just
d
)
Nothing
Nothing
Nothing
(
Just
$
(
Text
.
pack
.
show
)
l
)
)
=
HyperdataDocument
{
_hd_bdd
=
Just
"PubMed"
,
_hd_doi
=
Nothing
,
_hd_url
=
Nothing
,
_hd_uniqId
=
Nothing
,
_hd_uniqIdBdd
=
Nothing
,
_hd_page
=
Nothing
,
_hd_title
=
t
,
_hd_authors
=
authors
aus
,
_hd_institutes
=
institutes
aus
,
_hd_source
=
j
,
_hd_abstract
=
abstract
as
,
_hd_publication_date
=
Just
$
Text
.
pack
$
show
a
,
_hd_publication_year
=
Just
$
fromIntegral
y
,
_hd_publication_month
=
Just
m
,
_hd_publication_day
=
Just
d
,
_hd_publication_hour
=
Nothing
,
_hd_publication_minute
=
Nothing
,
_hd_publication_second
=
Nothing
,
_hd_language_iso2
=
Just
$
(
Text
.
pack
.
show
)
l
}
where
authors
::
Maybe
[
PubMedDoc
.
Author
]
->
Maybe
Text
authors
aus'
=
case
aus'
of
...
...
src/Gargantext/Core/Text/Corpus/Parsers.hs
View file @
cf6fbff6
...
...
@@ -122,25 +122,25 @@ toDoc ff d = do
(
utcTime
,
(
pub_year
,
pub_month
,
pub_day
))
<-
Date
.
dateSplit
lang
dateToParse
pure
$
HyperdataDocument
(
Just
$
DT
.
pack
$
show
ff
)
(
lookup
"doi"
d
)
(
lookup
"URL"
d
)
Nothing
Nothing
Nothing
(
lookup
"title"
d
)
Nothing
(
lookup
"authors"
d
)
(
lookup
"source"
d
)
(
lookup
"abstract"
d
)
(
fmap
(
DT
.
pack
.
show
)
utcTime
)
(
pub_year
)
(
pub_month
)
(
pub_day
)
Nothing
Nothing
Nothing
(
Just
$
(
DT
.
pack
.
show
)
lang
)
pure
$
HyperdataDocument
{
_hd_bdd
=
Just
$
DT
.
pack
$
show
ff
,
_hd_doi
=
lookup
"doi"
d
,
_hd_url
=
lookup
"URL"
d
,
_hd_uniqId
=
Nothing
,
_hd_uniqIdBdd
=
Nothing
,
_hd_page
=
Nothing
,
_hd_title
=
lookup
"title"
d
,
_hd_authors
=
Nothing
,
_hd_institutes
=
lookup
"authors"
d
,
_hd_source
=
lookup
"source"
d
,
_hd_abstract
=
lookup
"abstract"
d
,
_hd_publication_date
=
fmap
(
DT
.
pack
.
show
)
utcTime
,
_hd_publication_year
=
pub_year
,
_hd_publication_month
=
pub_month
,
_hd_publication_day
=
pub_day
,
_hd_publication_hour
=
Nothing
,
_hd_publication_minute
=
Nothing
,
_hd_publication_second
=
Nothing
,
_hd_language_iso2
=
Just
$
(
DT
.
pack
.
show
)
lang
}
enrichWith
::
FileFormat
->
(
a
,
[[[(
DB
.
ByteString
,
DB
.
ByteString
)]]])
->
(
a
,
[[(
Text
,
Text
)]])
...
...
src/Gargantext/Core/Text/Corpus/Parsers/CSV.hs
View file @
cf6fbff6
This diff is collapsed.
Click to expand it.
src/Gargantext/Core/Text/Corpus/Parsers/GrandDebat.hs
View file @
cf6fbff6
...
...
@@ -75,18 +75,26 @@ instance ToJSON GrandDebatReference
instance
ToHyperdataDocument
GrandDebatReference
where
toHyperdataDocument
(
GrandDebatReference
id'
_ref
title'
_createdAt'
publishedAt'
_updatedAt
_trashed
_trashedStatus
_authorId
authorType'
authorZipCode'
responses'
)
=
HyperdataDocument
(
Just
"GrandDebat"
)
id'
Nothing
Nothing
Nothing
Nothing
title'
authorType'
authorType'
authorZipCode'
(
toAbstract
<$>
responses'
)
publishedAt'
Nothing
Nothing
Nothing
Nothing
Nothing
Nothing
(
Just
$
Text
.
pack
$
show
FR
)
toHyperdataDocument
(
GrandDebatReference
{
id
,
title
,
publishedAt
,
authorType
,
authorZipCode
,
responses
})
=
HyperdataDocument
{
_hd_bdd
=
Just
"GrandDebat"
,
_hd_doi
=
id
,
_hd_url
=
Nothing
,
_hd_uniqId
=
Nothing
,
_hd_uniqIdBdd
=
Nothing
,
_hd_page
=
Nothing
,
_hd_title
=
title
,
_hd_authors
=
authorType
,
_hd_institutes
=
authorType
,
_hd_source
=
authorZipCode
,
_hd_abstract
=
toAbstract
<$>
responses
,
_hd_publication_date
=
publishedAt
,
_hd_publication_year
=
Nothing
,
_hd_publication_month
=
Nothing
,
_hd_publication_day
=
Nothing
,
_hd_publication_hour
=
Nothing
,
_hd_publication_minute
=
Nothing
,
_hd_publication_second
=
Nothing
,
_hd_language_iso2
=
Just
$
Text
.
pack
$
show
FR
}
where
toAbstract
=
(
Text
.
intercalate
" . "
)
.
((
filter
(
/=
""
))
.
(
map
toSentence
))
toSentence
(
GrandDebatResponse
_id
_qtitle
_qvalue
r
)
=
case
r
of
...
...
src/Gargantext/Core/Text/Corpus/Parsers/Isidore.hs
View file @
cf6fbff6
...
...
@@ -119,17 +119,24 @@ unbound _ _ = Nothing
bind2doc
::
Lang
->
[
BindingValue
]
->
HyperdataDocument
bind2doc
l
[
link
,
date
,
langDoc
,
authors
,
_source
,
publisher
,
title
,
abstract
]
=
HyperdataDocument
(
Just
"Isidore"
)
Nothing
(
unbound
l
link
)
Nothing
Nothing
Nothing
(
unbound
l
title
)
(
unbound
l
authors
)
Nothing
(
unbound
l
publisher
)
(
unbound
l
abstract
)
(
unbound
l
date
)
Nothing
Nothing
Nothing
Nothing
Nothing
Nothing
(
unbound
l
langDoc
)
HyperdataDocument
{
_hd_bdd
=
Just
"Isidore"
,
_hd_doi
=
Nothing
,
_hd_url
=
unbound
l
link
,
_hd_uniqId
=
Nothing
,
_hd_uniqIdBdd
=
Nothing
,
_hd_page
=
Nothing
,
_hd_title
=
unbound
l
title
,
_hd_authors
=
unbound
l
authors
,
_hd_institutes
=
Nothing
,
_hd_source
=
unbound
l
publisher
,
_hd_abstract
=
unbound
l
abstract
,
_hd_publication_date
=
unbound
l
date
,
_hd_publication_year
=
Nothing
,
_hd_publication_month
=
Nothing
,
_hd_publication_day
=
Nothing
,
_hd_publication_hour
=
Nothing
,
_hd_publication_minute
=
Nothing
,
_hd_publication_second
=
Nothing
,
_hd_language_iso2
=
unbound
l
langDoc
}
bind2doc
_
_
=
undefined
src/Gargantext/Core/Text/Corpus/Parsers/Json2Csv.hs
View file @
cf6fbff6
...
...
@@ -48,8 +48,14 @@ json2csv fin fout = do
writeFile
fout
(
headerCsvGargV3
,
fromList
$
map
patent2csvDoc
patents
)
patent2csvDoc
::
Patent
->
CsvDoc
patent2csvDoc
(
Patent
title
abstract
year
_
)
=
CsvDoc
title
"Source"
(
Just
$
read
(
unpack
year
))
(
Just
1
)
(
Just
1
)
abstract
"Authors"
patent2csvDoc
(
Patent
{
..
})
=
CsvDoc
{
csv_title
=
_patent_title
,
csv_source
=
"Source"
,
csv_publication_year
=
Just
$
read
(
unpack
_patent_year
)
,
csv_publication_month
=
Just
1
,
csv_publication_day
=
Just
1
,
csv_abstract
=
_patent_abstract
,
csv_authors
=
"Authors"
}
...
...
src/Gargantext/Core/Text/Corpus/Parsers/RIS.hs
View file @
cf6fbff6
...
...
@@ -70,3 +70,5 @@ onField :: ByteString -> (ByteString -> [(ByteString, ByteString)])
->
[(
ByteString
,
ByteString
)]
->
[(
ByteString
,
ByteString
)]
onField
k
f
m
=
m
<>
(
maybe
[]
f
(
lookup
k
m
)
)
src/Gargantext/Core/Text/Corpus/Parsers/RIS/Presse.hs
View file @
cf6fbff6
...
...
@@ -68,5 +68,3 @@ fixFields ns = map (first fixFields'') ns
|
champs
==
"UR"
=
"url"
|
champs
==
"N2"
=
abstract
|
otherwise
=
champs
src/Gargantext/Core/Text/Corpus/Parsers/Wikimedia.hs
View file @
cf6fbff6
...
...
@@ -95,7 +95,9 @@ parsePage =
revision
<-
parseRevision
many_
$
ignoreAnyTreeContent
return
$
Page
Mediawiki
title
revision
return
$
Page
{
_markupFormat
=
Mediawiki
,
_title
=
title
,
_text
=
revision
}
parseMediawiki
::
MonadThrow
m
=>
ConduitT
Event
Page
m
(
Maybe
()
)
parseMediawiki
=
...
...
@@ -108,7 +110,7 @@ mediawikiPageToPlain :: Page -> IO Page
mediawikiPageToPlain
page
=
do
title
<-
mediaToPlain
$
_title
page
revision
<-
mediaToPlain
$
_text
page
return
$
Page
Plaintext
title
revision
return
$
Page
{
_markupFormat
=
Plaintext
,
_title
=
title
,
_text
=
revision
}
where
mediaToPlain
media
=
case
media
of
(
Nothing
)
->
return
Nothing
...
...
src/Gargantext/Core/Text/List.hs
View file @
cf6fbff6
...
...
@@ -86,7 +86,7 @@ buildNgramsLists user uCid mCid mfslw gp = do
data
MapListSize
=
MapListSize
{
unMapListSize
::
!
Int
}
buildNgramsOthersList
::
(
HasNodeError
err
buildNgramsOthersList
::
(
HasNodeError
err
,
CmdM
env
err
m
,
HasNodeStory
env
err
m
,
HasTreeError
err
...
...
src/Gargantext/Core/Text/List/Group/WithStem.hs
View file @
cf6fbff6
...
...
@@ -72,7 +72,7 @@ groupWith :: GroupParams
->
NgramsTerm
->
NgramsTerm
groupWith
GroupIdentity
t
=
identity
t
groupWith
(
GroupParams
l
_m
_n
_
)
t
=
groupWith
(
GroupParams
{
unGroupParams_lang
=
l
}
)
t
=
NgramsTerm
$
Text
.
intercalate
" "
$
map
(
stem
l
)
...
...
@@ -86,7 +86,7 @@ groupWith (GroupParams l _m _n _) t =
$
unNgramsTerm
t
-- | This lemmatization group done with CoreNLP algo (or others)
groupWith
(
GroupWithPosTag
_
_
m
)
t
=
groupWith
(
GroupWithPosTag
{
_gwl_map
=
m
}
)
t
=
case
HashMap
.
lookup
(
unNgramsTerm
t
)
m
of
Nothing
->
clean
t
Just
t'
->
clean
$
NgramsTerm
t'
...
...
src/Gargantext/Core/Text/Terms.hs
View file @
cf6fbff6
...
...
@@ -82,11 +82,11 @@ makeLenses ''TermType
--extractTerms :: Traversable t => TermType Lang -> t Text -> IO (t [Terms])
extractTerms
::
TermType
Lang
->
[
Text
]
->
IO
[[
Terms
]]
extractTerms
(
Unsupervised
l
n
s
m
)
xs
=
mapM
(
terms
(
Unsupervised
l
n
s
(
Just
m'
)
))
xs
extractTerms
(
Unsupervised
{
..
})
xs
=
mapM
(
terms
(
Unsupervised
{
_tt_model
=
Just
m'
,
..
}
))
xs
where
m'
=
case
m
of
m'
=
case
_tt_model
of
Just
m''
->
m''
Nothing
->
newTries
n
(
Text
.
intercalate
" "
xs
)
Nothing
->
newTries
_tt_windowSize
(
Text
.
intercalate
" "
xs
)
extractTerms
termTypeLang
xs
=
mapM
(
terms
termTypeLang
)
xs
...
...
@@ -96,11 +96,12 @@ withLang :: (Foldable t, Functor t, HasText h)
=>
TermType
Lang
->
t
h
->
TermType
Lang
withLang
(
Unsupervised
l
n
s
m
)
ns
=
Unsupervised
l
n
s
m'
withLang
(
Unsupervised
{
..
})
ns
=
Unsupervised
{
_tt_model
=
m'
,
..
}
where
m'
=
case
m
of
m'
=
case
_tt_model
of
Nothing
->
-- trace ("buildTries here" :: String)
Just
$
buildTries
n
$
fmap
toToken
Just
$
buildTries
_tt_ngramsSize
$
fmap
toToken
$
uniText
$
Text
.
intercalate
" . "
$
List
.
concat
...
...
@@ -171,9 +172,9 @@ terms :: TermType Lang -> Text -> IO [Terms]
terms
(
Mono
lang
)
txt
=
pure
$
monoTerms
lang
txt
terms
(
Multi
lang
)
txt
=
multiterms
lang
txt
terms
(
MonoMulti
lang
)
txt
=
terms
(
Multi
lang
)
txt
terms
(
Unsupervised
lang
n
s
m
)
txt
=
termsUnsupervised
(
Unsupervised
lang
n
s
(
Just
m'
)
)
txt
terms
(
Unsupervised
{
..
})
txt
=
termsUnsupervised
(
Unsupervised
{
_tt_model
=
Just
m'
,
..
}
)
txt
where
m'
=
maybe
(
newTries
n
txt
)
identity
m
m'
=
maybe
(
newTries
_tt_ngramsSize
txt
)
identity
_tt_model
-- terms (WithList list) txt = pure . concat $ extractTermsWithList list txt
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment