Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
195
Issues
195
List
Board
Labels
Milestones
Merge Requests
12
Merge Requests
12
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
gargantext
haskell-gargantext
Commits
cf6fbff6
Verified
Commit
cf6fbff6
authored
Sep 13, 2021
by
Przemyslaw Kaminski
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
[refactoring] more record syntax refactoring
parent
37a36aba
Pipeline
#1802
passed with stage
in 33 minutes and 36 seconds
Changes
14
Pipelines
1
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
14 changed files
with
333 additions
and
296 deletions
+333
-296
Hal.hs
src/Gargantext/Core/Text/Corpus/API/Hal.hs
+19
-19
Istex.hs
src/Gargantext/Core/Text/Corpus/API/Istex.hs
+20
-19
Pubmed.hs
src/Gargantext/Core/Text/Corpus/API/Pubmed.hs
+19
-19
Parsers.hs
src/Gargantext/Core/Text/Corpus/Parsers.hs
+19
-19
CSV.hs
src/Gargantext/Core/Text/Corpus/Parsers/CSV.hs
+168
-156
GrandDebat.hs
src/Gargantext/Core/Text/Corpus/Parsers/GrandDebat.hs
+20
-12
Isidore.hs
src/Gargantext/Core/Text/Corpus/Parsers/Isidore.hs
+19
-12
Json2Csv.hs
src/Gargantext/Core/Text/Corpus/Parsers/Json2Csv.hs
+8
-2
RIS.hs
src/Gargantext/Core/Text/Corpus/Parsers/RIS.hs
+2
-0
Presse.hs
src/Gargantext/Core/Text/Corpus/Parsers/RIS/Presse.hs
+0
-2
Wikimedia.hs
src/Gargantext/Core/Text/Corpus/Parsers/Wikimedia.hs
+4
-2
List.hs
src/Gargantext/Core/Text/List.hs
+20
-20
WithStem.hs
src/Gargantext/Core/Text/List/Group/WithStem.hs
+2
-2
Terms.hs
src/Gargantext/Core/Text/Terms.hs
+13
-12
No files found.
src/Gargantext/Core/Text/Corpus/API/Hal.hs
View file @
cf6fbff6
...
@@ -31,23 +31,23 @@ get la q ml = do
...
@@ -31,23 +31,23 @@ get la q ml = do
toDoc'
::
Lang
->
HAL
.
Corpus
->
IO
HyperdataDocument
toDoc'
::
Lang
->
HAL
.
Corpus
->
IO
HyperdataDocument
toDoc'
la
(
HAL
.
Corpus
i
t
ab
d
s
aus
affs
struct_id
)
=
do
toDoc'
la
(
HAL
.
Corpus
i
t
ab
d
s
aus
affs
struct_id
)
=
do
(
utctime
,
(
pub_year
,
pub_month
,
pub_day
))
<-
Date
.
dateSplit
la
(
maybe
(
Just
"2019"
)
Just
d
)
(
utctime
,
(
pub_year
,
pub_month
,
pub_day
))
<-
Date
.
dateSplit
la
(
maybe
(
Just
"2019"
)
Just
d
)
pure
$
HyperdataDocument
(
Just
"Hal"
)
pure
$
HyperdataDocument
{
_hd_bdd
=
Just
"Hal"
(
Just
$
pack
$
show
i
)
,
_hd_doi
=
Just
$
pack
$
show
i
Nothing
,
_hd_url
=
Nothing
Nothing
,
_hd_uniqId
=
Nothing
Nothing
,
_hd_uniqIdBdd
=
Nothing
Nothing
,
_hd_page
=
Nothing
(
Just
$
intercalate
" "
t
)
,
_hd_title
=
Just
$
intercalate
" "
t
(
Just
$
foldl
(
\
x
y
->
x
<>
", "
<>
y
)
""
aus
)
,
_hd_authors
=
Just
$
foldl
(
\
x
y
->
x
<>
", "
<>
y
)
""
aus
(
Just
$
foldl
(
\
x
y
->
x
<>
", "
<>
y
)
""
$
affs
<>
map
(
cs
.
show
)
struct_id
)
,
_hd_institutes
=
Just
$
foldl
(
\
x
y
->
x
<>
", "
<>
y
)
""
$
affs
<>
map
(
cs
.
show
)
struct_id
(
Just
$
maybe
"Nothing"
identity
s
)
,
_hd_source
=
Just
$
maybe
"Nothing"
identity
s
(
Just
$
intercalate
" "
ab
)
,
_hd_abstract
=
Just
$
intercalate
" "
ab
(
fmap
(
pack
.
show
)
utctime
)
,
_hd_publication_date
=
fmap
(
pack
.
show
)
utctime
pub_year
,
_hd_publication_year
=
pub_year
pub_month
,
_hd_publication_month
=
pub_month
pub_day
,
_hd_publication_day
=
pub_day
Nothing
,
_hd_publication_hour
=
Nothing
Nothing
,
_hd_publication_minute
=
Nothing
Nothing
,
_hd_publication_second
=
Nothing
(
Just
$
(
pack
.
show
)
la
)
,
_hd_language_iso2
=
Just
$
(
pack
.
show
)
la
}
src/Gargantext/Core/Text/Corpus/API/Istex.hs
View file @
cf6fbff6
...
@@ -39,22 +39,23 @@ toDoc' la docs' = do
...
@@ -39,22 +39,23 @@ toDoc' la docs' = do
toDoc
::
Lang
->
ISTEX
.
Document
->
IO
HyperdataDocument
toDoc
::
Lang
->
ISTEX
.
Document
->
IO
HyperdataDocument
toDoc
la
(
ISTEX
.
Document
i
t
a
ab
d
s
)
=
do
toDoc
la
(
ISTEX
.
Document
i
t
a
ab
d
s
)
=
do
(
utctime
,
(
pub_year
,
pub_month
,
pub_day
))
<-
Date
.
dateSplit
la
(
maybe
(
Just
"2019"
)
(
Just
.
pack
.
show
)
d
)
(
utctime
,
(
pub_year
,
pub_month
,
pub_day
))
<-
Date
.
dateSplit
la
(
maybe
(
Just
"2019"
)
(
Just
.
pack
.
show
)
d
)
pure
$
HyperdataDocument
(
Just
"Istex"
)
pure
$
HyperdataDocument
{
_hd_bdd
=
Just
"Istex"
(
Just
i
)
,
_hd_doi
=
Just
i
Nothing
,
_hd_url
=
Nothing
Nothing
,
_hd_uniqId
=
Nothing
Nothing
,
_hd_uniqIdBdd
=
Nothing
Nothing
,
_hd_page
=
Nothing
t
,
_hd_title
=
t
(
Just
$
foldl
(
\
x
y
->
x
<>
", "
<>
y
)
""
(
map
ISTEX
.
_author_name
a
))
,
_hd_authors
=
Just
$
foldl
(
\
x
y
->
x
<>
", "
<>
y
)
""
(
map
ISTEX
.
_author_name
a
)
(
Just
$
foldl
(
\
x
y
->
x
<>
", "
<>
y
)
""
(
concat
$
(
map
ISTEX
.
_author_affiliations
)
a
))
,
_hd_institutes
=
Just
$
foldl
(
\
x
y
->
x
<>
", "
<>
y
)
""
(
concat
$
(
map
ISTEX
.
_author_affiliations
)
a
)
(
Just
$
foldl
(
\
x
y
->
x
<>
", "
<>
y
)
""
(
catMaybes
$
map
ISTEX
.
_source_title
s
))
,
_hd_source
=
Just
$
foldl
(
\
x
y
->
x
<>
", "
<>
y
)
""
(
catMaybes
$
map
ISTEX
.
_source_title
s
)
ab
,
_hd_abstract
=
ab
(
fmap
(
pack
.
show
)
utctime
)
,
_hd_publication_date
=
fmap
(
pack
.
show
)
utctime
pub_year
,
_hd_publication_year
=
pub_year
pub_month
,
_hd_publication_month
=
pub_month
pub_day
,
_hd_publication_day
=
pub_day
Nothing
,
_hd_publication_hour
=
Nothing
Nothing
,
_hd_publication_minute
=
Nothing
Nothing
,
_hd_publication_second
=
Nothing
(
Just
$
(
pack
.
show
)
la
)
,
_hd_language_iso2
=
Just
$
(
pack
.
show
)
la
}
src/Gargantext/Core/Text/Corpus/API/Pubmed.hs
View file @
cf6fbff6
...
@@ -38,25 +38,25 @@ get q l = either (\e -> panic $ "CRAWL: PubMed" <> e) (map (toDoc EN))
...
@@ -38,25 +38,25 @@ get q l = either (\e -> panic $ "CRAWL: PubMed" <> e) (map (toDoc EN))
toDoc
::
Lang
->
PubMedDoc
.
PubMed
->
HyperdataDocument
toDoc
::
Lang
->
PubMedDoc
.
PubMed
->
HyperdataDocument
toDoc
l
(
PubMedDoc
.
PubMed
(
PubMedDoc
.
PubMedArticle
t
j
as
aus
)
toDoc
l
(
PubMedDoc
.
PubMed
(
PubMedDoc
.
PubMedArticle
t
j
as
aus
)
(
PubMedDoc
.
PubMedDate
a
y
m
d
)
(
PubMedDoc
.
PubMedDate
a
y
m
d
)
)
=
HyperdataDocument
(
Just
"PubMed"
)
)
=
HyperdataDocument
{
_hd_bdd
=
Just
"PubMed"
Nothing
,
_hd_doi
=
Nothing
Nothing
,
_hd_url
=
Nothing
Nothing
,
_hd_uniqId
=
Nothing
Nothing
,
_hd_uniqIdBdd
=
Nothing
Nothing
,
_hd_page
=
Nothing
t
,
_hd_title
=
t
(
authors
aus
)
,
_hd_authors
=
authors
aus
(
institutes
aus
)
,
_hd_institutes
=
institutes
aus
j
,
_hd_source
=
j
(
abstract
as
)
,
_hd_abstract
=
abstract
as
(
Just
$
Text
.
pack
$
show
a
)
,
_hd_publication_date
=
Just
$
Text
.
pack
$
show
a
(
Just
$
fromIntegral
y
)
,
_hd_publication_year
=
Just
$
fromIntegral
y
(
Just
m
)
,
_hd_publication_month
=
Just
m
(
Just
d
)
,
_hd_publication_day
=
Just
d
Nothing
,
_hd_publication_hour
=
Nothing
Nothing
,
_hd_publication_minute
=
Nothing
Nothing
,
_hd_publication_second
=
Nothing
(
Just
$
(
Text
.
pack
.
show
)
l
)
,
_hd_language_iso2
=
Just
$
(
Text
.
pack
.
show
)
l
}
where
where
authors
::
Maybe
[
PubMedDoc
.
Author
]
->
Maybe
Text
authors
::
Maybe
[
PubMedDoc
.
Author
]
->
Maybe
Text
authors
aus'
=
case
aus'
of
authors
aus'
=
case
aus'
of
...
...
src/Gargantext/Core/Text/Corpus/Parsers.hs
View file @
cf6fbff6
...
@@ -122,25 +122,25 @@ toDoc ff d = do
...
@@ -122,25 +122,25 @@ toDoc ff d = do
(
utcTime
,
(
pub_year
,
pub_month
,
pub_day
))
<-
Date
.
dateSplit
lang
dateToParse
(
utcTime
,
(
pub_year
,
pub_month
,
pub_day
))
<-
Date
.
dateSplit
lang
dateToParse
pure
$
HyperdataDocument
(
Just
$
DT
.
pack
$
show
ff
)
pure
$
HyperdataDocument
{
_hd_bdd
=
Just
$
DT
.
pack
$
show
ff
(
lookup
"doi"
d
)
,
_hd_doi
=
lookup
"doi"
d
(
lookup
"URL"
d
)
,
_hd_url
=
lookup
"URL"
d
Nothing
,
_hd_uniqId
=
Nothing
Nothing
,
_hd_uniqIdBdd
=
Nothing
Nothing
,
_hd_page
=
Nothing
(
lookup
"title"
d
)
,
_hd_title
=
lookup
"title"
d
Nothing
,
_hd_authors
=
Nothing
(
lookup
"authors"
d
)
,
_hd_institutes
=
lookup
"authors"
d
(
lookup
"source"
d
)
,
_hd_source
=
lookup
"source"
d
(
lookup
"abstract"
d
)
,
_hd_abstract
=
lookup
"abstract"
d
(
fmap
(
DT
.
pack
.
show
)
utcTime
)
,
_hd_publication_date
=
fmap
(
DT
.
pack
.
show
)
utcTime
(
pub_year
)
,
_hd_publication_year
=
pub_year
(
pub_month
)
,
_hd_publication_month
=
pub_month
(
pub_day
)
,
_hd_publication_day
=
pub_day
Nothing
,
_hd_publication_hour
=
Nothing
Nothing
,
_hd_publication_minute
=
Nothing
Nothing
,
_hd_publication_second
=
Nothing
(
Just
$
(
DT
.
pack
.
show
)
lang
)
,
_hd_language_iso2
=
Just
$
(
DT
.
pack
.
show
)
lang
}
enrichWith
::
FileFormat
enrichWith
::
FileFormat
->
(
a
,
[[[(
DB
.
ByteString
,
DB
.
ByteString
)]]])
->
(
a
,
[[(
Text
,
Text
)]])
->
(
a
,
[[[(
DB
.
ByteString
,
DB
.
ByteString
)]]])
->
(
a
,
[[(
Text
,
Text
)]])
...
...
src/Gargantext/Core/Text/Corpus/Parsers/CSV.hs
View file @
cf6fbff6
This diff is collapsed.
Click to expand it.
src/Gargantext/Core/Text/Corpus/Parsers/GrandDebat.hs
View file @
cf6fbff6
...
@@ -75,18 +75,26 @@ instance ToJSON GrandDebatReference
...
@@ -75,18 +75,26 @@ instance ToJSON GrandDebatReference
instance
ToHyperdataDocument
GrandDebatReference
instance
ToHyperdataDocument
GrandDebatReference
where
where
toHyperdataDocument
(
GrandDebatReference
id'
_ref
title'
toHyperdataDocument
(
GrandDebatReference
{
id
,
title
,
publishedAt
,
authorType
,
authorZipCode
,
responses
})
=
_createdAt'
publishedAt'
_updatedAt
HyperdataDocument
{
_hd_bdd
=
Just
"GrandDebat"
_trashed
_trashedStatus
,
_hd_doi
=
id
_authorId
authorType'
authorZipCode'
,
_hd_url
=
Nothing
responses'
)
=
,
_hd_uniqId
=
Nothing
HyperdataDocument
(
Just
"GrandDebat"
)
id'
,
_hd_uniqIdBdd
=
Nothing
Nothing
Nothing
Nothing
Nothing
,
_hd_page
=
Nothing
title'
authorType'
authorType'
authorZipCode'
,
_hd_title
=
title
(
toAbstract
<$>
responses'
)
,
_hd_authors
=
authorType
publishedAt'
,
_hd_institutes
=
authorType
Nothing
Nothing
Nothing
Nothing
Nothing
Nothing
,
_hd_source
=
authorZipCode
(
Just
$
Text
.
pack
$
show
FR
)
,
_hd_abstract
=
toAbstract
<$>
responses
,
_hd_publication_date
=
publishedAt
,
_hd_publication_year
=
Nothing
,
_hd_publication_month
=
Nothing
,
_hd_publication_day
=
Nothing
,
_hd_publication_hour
=
Nothing
,
_hd_publication_minute
=
Nothing
,
_hd_publication_second
=
Nothing
,
_hd_language_iso2
=
Just
$
Text
.
pack
$
show
FR
}
where
where
toAbstract
=
(
Text
.
intercalate
" . "
)
.
((
filter
(
/=
""
))
.
(
map
toSentence
))
toAbstract
=
(
Text
.
intercalate
" . "
)
.
((
filter
(
/=
""
))
.
(
map
toSentence
))
toSentence
(
GrandDebatResponse
_id
_qtitle
_qvalue
r
)
=
case
r
of
toSentence
(
GrandDebatResponse
_id
_qtitle
_qvalue
r
)
=
case
r
of
...
...
src/Gargantext/Core/Text/Corpus/Parsers/Isidore.hs
View file @
cf6fbff6
...
@@ -119,17 +119,24 @@ unbound _ _ = Nothing
...
@@ -119,17 +119,24 @@ unbound _ _ = Nothing
bind2doc
::
Lang
->
[
BindingValue
]
->
HyperdataDocument
bind2doc
::
Lang
->
[
BindingValue
]
->
HyperdataDocument
bind2doc
l
[
link
,
date
,
langDoc
,
authors
,
_source
,
publisher
,
title
,
abstract
]
=
bind2doc
l
[
link
,
date
,
langDoc
,
authors
,
_source
,
publisher
,
title
,
abstract
]
=
HyperdataDocument
(
Just
"Isidore"
)
HyperdataDocument
{
_hd_bdd
=
Just
"Isidore"
Nothing
,
_hd_doi
=
Nothing
(
unbound
l
link
)
,
_hd_url
=
unbound
l
link
Nothing
Nothing
Nothing
,
_hd_uniqId
=
Nothing
(
unbound
l
title
)
,
_hd_uniqIdBdd
=
Nothing
(
unbound
l
authors
)
,
_hd_page
=
Nothing
Nothing
,
_hd_title
=
unbound
l
title
(
unbound
l
publisher
)
,
_hd_authors
=
unbound
l
authors
(
unbound
l
abstract
)
,
_hd_institutes
=
Nothing
(
unbound
l
date
)
,
_hd_source
=
unbound
l
publisher
Nothing
Nothing
Nothing
Nothing
Nothing
Nothing
,
_hd_abstract
=
unbound
l
abstract
(
unbound
l
langDoc
)
,
_hd_publication_date
=
unbound
l
date
,
_hd_publication_year
=
Nothing
,
_hd_publication_month
=
Nothing
,
_hd_publication_day
=
Nothing
,
_hd_publication_hour
=
Nothing
,
_hd_publication_minute
=
Nothing
,
_hd_publication_second
=
Nothing
,
_hd_language_iso2
=
unbound
l
langDoc
}
bind2doc
_
_
=
undefined
bind2doc
_
_
=
undefined
src/Gargantext/Core/Text/Corpus/Parsers/Json2Csv.hs
View file @
cf6fbff6
...
@@ -48,8 +48,14 @@ json2csv fin fout = do
...
@@ -48,8 +48,14 @@ json2csv fin fout = do
writeFile
fout
(
headerCsvGargV3
,
fromList
$
map
patent2csvDoc
patents
)
writeFile
fout
(
headerCsvGargV3
,
fromList
$
map
patent2csvDoc
patents
)
patent2csvDoc
::
Patent
->
CsvDoc
patent2csvDoc
::
Patent
->
CsvDoc
patent2csvDoc
(
Patent
title
abstract
year
_
)
=
patent2csvDoc
(
Patent
{
..
})
=
CsvDoc
title
"Source"
(
Just
$
read
(
unpack
year
))
(
Just
1
)
(
Just
1
)
abstract
"Authors"
CsvDoc
{
csv_title
=
_patent_title
,
csv_source
=
"Source"
,
csv_publication_year
=
Just
$
read
(
unpack
_patent_year
)
,
csv_publication_month
=
Just
1
,
csv_publication_day
=
Just
1
,
csv_abstract
=
_patent_abstract
,
csv_authors
=
"Authors"
}
...
...
src/Gargantext/Core/Text/Corpus/Parsers/RIS.hs
View file @
cf6fbff6
...
@@ -70,3 +70,5 @@ onField :: ByteString -> (ByteString -> [(ByteString, ByteString)])
...
@@ -70,3 +70,5 @@ onField :: ByteString -> (ByteString -> [(ByteString, ByteString)])
->
[(
ByteString
,
ByteString
)]
->
[(
ByteString
,
ByteString
)]
->
[(
ByteString
,
ByteString
)]
->
[(
ByteString
,
ByteString
)]
onField
k
f
m
=
m
<>
(
maybe
[]
f
(
lookup
k
m
)
)
onField
k
f
m
=
m
<>
(
maybe
[]
f
(
lookup
k
m
)
)
src/Gargantext/Core/Text/Corpus/Parsers/RIS/Presse.hs
View file @
cf6fbff6
...
@@ -68,5 +68,3 @@ fixFields ns = map (first fixFields'') ns
...
@@ -68,5 +68,3 @@ fixFields ns = map (first fixFields'') ns
|
champs
==
"UR"
=
"url"
|
champs
==
"UR"
=
"url"
|
champs
==
"N2"
=
abstract
|
champs
==
"N2"
=
abstract
|
otherwise
=
champs
|
otherwise
=
champs
src/Gargantext/Core/Text/Corpus/Parsers/Wikimedia.hs
View file @
cf6fbff6
...
@@ -95,7 +95,9 @@ parsePage =
...
@@ -95,7 +95,9 @@ parsePage =
revision
<-
revision
<-
parseRevision
parseRevision
many_
$
ignoreAnyTreeContent
many_
$
ignoreAnyTreeContent
return
$
Page
Mediawiki
title
revision
return
$
Page
{
_markupFormat
=
Mediawiki
,
_title
=
title
,
_text
=
revision
}
parseMediawiki
::
MonadThrow
m
=>
ConduitT
Event
Page
m
(
Maybe
()
)
parseMediawiki
::
MonadThrow
m
=>
ConduitT
Event
Page
m
(
Maybe
()
)
parseMediawiki
=
parseMediawiki
=
...
@@ -108,7 +110,7 @@ mediawikiPageToPlain :: Page -> IO Page
...
@@ -108,7 +110,7 @@ mediawikiPageToPlain :: Page -> IO Page
mediawikiPageToPlain
page
=
do
mediawikiPageToPlain
page
=
do
title
<-
mediaToPlain
$
_title
page
title
<-
mediaToPlain
$
_title
page
revision
<-
mediaToPlain
$
_text
page
revision
<-
mediaToPlain
$
_text
page
return
$
Page
Plaintext
title
revision
return
$
Page
{
_markupFormat
=
Plaintext
,
_title
=
title
,
_text
=
revision
}
where
mediaToPlain
media
=
where
mediaToPlain
media
=
case
media
of
case
media
of
(
Nothing
)
->
return
Nothing
(
Nothing
)
->
return
Nothing
...
...
src/Gargantext/Core/Text/List.hs
View file @
cf6fbff6
...
@@ -86,17 +86,17 @@ buildNgramsLists user uCid mCid mfslw gp = do
...
@@ -86,17 +86,17 @@ buildNgramsLists user uCid mCid mfslw gp = do
data
MapListSize
=
MapListSize
{
unMapListSize
::
!
Int
}
data
MapListSize
=
MapListSize
{
unMapListSize
::
!
Int
}
buildNgramsOthersList
::
(
HasNodeError
err
buildNgramsOthersList
::
(
HasNodeError
err
,
CmdM
env
err
m
,
CmdM
env
err
m
,
HasNodeStory
env
err
m
,
HasNodeStory
env
err
m
,
HasTreeError
err
,
HasTreeError
err
)
)
=>
User
=>
User
->
UserCorpusId
->
UserCorpusId
->
Maybe
FlowSocialListWith
->
Maybe
FlowSocialListWith
->
GroupParams
->
GroupParams
->
(
NgramsType
,
MapListSize
)
->
(
NgramsType
,
MapListSize
)
->
m
(
Map
NgramsType
[
NgramsElement
])
->
m
(
Map
NgramsType
[
NgramsElement
])
buildNgramsOthersList
user
uCid
mfslw
_groupParams
(
nt
,
MapListSize
mapListSize
)
=
do
buildNgramsOthersList
user
uCid
mfslw
_groupParams
(
nt
,
MapListSize
mapListSize
)
=
do
allTerms
::
HashMap
NgramsTerm
(
Set
NodeId
)
<-
getNodesByNgramsUser
uCid
nt
allTerms
::
HashMap
NgramsTerm
(
Set
NodeId
)
<-
getNodesByNgramsUser
uCid
nt
...
@@ -106,7 +106,7 @@ buildNgramsOthersList user uCid mfslw _groupParams (nt, MapListSize mapListSize)
...
@@ -106,7 +106,7 @@ buildNgramsOthersList user uCid mfslw _groupParams (nt, MapListSize mapListSize)
$
HashMap
.
fromList
$
HashMap
.
fromList
$
List
.
zip
(
HashMap
.
keys
allTerms
)
$
List
.
zip
(
HashMap
.
keys
allTerms
)
(
List
.
cycle
[
mempty
])
(
List
.
cycle
[
mempty
])
)
)
let
let
groupedWithList
=
toGroupedTree
{- groupParams -}
socialLists
allTerms
groupedWithList
=
toGroupedTree
{- groupParams -}
socialLists
allTerms
...
@@ -148,13 +148,13 @@ buildNgramsTermsList :: ( HasNodeError err
...
@@ -148,13 +148,13 @@ buildNgramsTermsList :: ( HasNodeError err
,
HasNodeStory
env
err
m
,
HasNodeStory
env
err
m
,
HasTreeError
err
,
HasTreeError
err
)
)
=>
User
=>
User
->
UserCorpusId
->
UserCorpusId
->
MasterCorpusId
->
MasterCorpusId
->
Maybe
FlowSocialListWith
->
Maybe
FlowSocialListWith
->
GroupParams
->
GroupParams
->
(
NgramsType
,
MapListSize
)
->
(
NgramsType
,
MapListSize
)
->
m
(
Map
NgramsType
[
NgramsElement
])
->
m
(
Map
NgramsType
[
NgramsElement
])
buildNgramsTermsList
user
uCid
mCid
mfslw
groupParams
(
nt
,
_mapListSize
)
=
do
buildNgramsTermsList
user
uCid
mCid
mfslw
groupParams
(
nt
,
_mapListSize
)
=
do
-- Filter 0 With Double
-- Filter 0 With Double
...
@@ -170,7 +170,7 @@ buildNgramsTermsList user uCid mCid mfslw groupParams (nt, _mapListSize)= do
...
@@ -170,7 +170,7 @@ buildNgramsTermsList user uCid mCid mfslw groupParams (nt, _mapListSize)= do
$
HashMap
.
fromList
$
HashMap
.
fromList
$
List
.
zip
(
HashMap
.
keys
allTerms
)
$
List
.
zip
(
HashMap
.
keys
allTerms
)
(
List
.
cycle
[
mempty
])
(
List
.
cycle
[
mempty
])
)
)
printDebug
"[buldNgramsTermsList: Flow Social List / end]"
nt
printDebug
"[buldNgramsTermsList: Flow Social List / end]"
nt
let
ngramsKeys
=
HashMap
.
keysSet
allTerms
let
ngramsKeys
=
HashMap
.
keysSet
allTerms
...
...
src/Gargantext/Core/Text/List/Group/WithStem.hs
View file @
cf6fbff6
...
@@ -72,7 +72,7 @@ groupWith :: GroupParams
...
@@ -72,7 +72,7 @@ groupWith :: GroupParams
->
NgramsTerm
->
NgramsTerm
->
NgramsTerm
->
NgramsTerm
groupWith
GroupIdentity
t
=
identity
t
groupWith
GroupIdentity
t
=
identity
t
groupWith
(
GroupParams
l
_m
_n
_
)
t
=
groupWith
(
GroupParams
{
unGroupParams_lang
=
l
}
)
t
=
NgramsTerm
NgramsTerm
$
Text
.
intercalate
" "
$
Text
.
intercalate
" "
$
map
(
stem
l
)
$
map
(
stem
l
)
...
@@ -86,7 +86,7 @@ groupWith (GroupParams l _m _n _) t =
...
@@ -86,7 +86,7 @@ groupWith (GroupParams l _m _n _) t =
$
unNgramsTerm
t
$
unNgramsTerm
t
-- | This lemmatization group done with CoreNLP algo (or others)
-- | This lemmatization group done with CoreNLP algo (or others)
groupWith
(
GroupWithPosTag
_
_
m
)
t
=
groupWith
(
GroupWithPosTag
{
_gwl_map
=
m
}
)
t
=
case
HashMap
.
lookup
(
unNgramsTerm
t
)
m
of
case
HashMap
.
lookup
(
unNgramsTerm
t
)
m
of
Nothing
->
clean
t
Nothing
->
clean
t
Just
t'
->
clean
$
NgramsTerm
t'
Just
t'
->
clean
$
NgramsTerm
t'
...
...
src/Gargantext/Core/Text/Terms.hs
View file @
cf6fbff6
...
@@ -82,11 +82,11 @@ makeLenses ''TermType
...
@@ -82,11 +82,11 @@ makeLenses ''TermType
--extractTerms :: Traversable t => TermType Lang -> t Text -> IO (t [Terms])
--extractTerms :: Traversable t => TermType Lang -> t Text -> IO (t [Terms])
extractTerms
::
TermType
Lang
->
[
Text
]
->
IO
[[
Terms
]]
extractTerms
::
TermType
Lang
->
[
Text
]
->
IO
[[
Terms
]]
extractTerms
(
Unsupervised
l
n
s
m
)
xs
=
mapM
(
terms
(
Unsupervised
l
n
s
(
Just
m'
)
))
xs
extractTerms
(
Unsupervised
{
..
})
xs
=
mapM
(
terms
(
Unsupervised
{
_tt_model
=
Just
m'
,
..
}
))
xs
where
where
m'
=
case
m
of
m'
=
case
_tt_model
of
Just
m''
->
m''
Just
m''
->
m''
Nothing
->
newTries
n
(
Text
.
intercalate
" "
xs
)
Nothing
->
newTries
_tt_windowSize
(
Text
.
intercalate
" "
xs
)
extractTerms
termTypeLang
xs
=
mapM
(
terms
termTypeLang
)
xs
extractTerms
termTypeLang
xs
=
mapM
(
terms
termTypeLang
)
xs
...
@@ -96,15 +96,16 @@ withLang :: (Foldable t, Functor t, HasText h)
...
@@ -96,15 +96,16 @@ withLang :: (Foldable t, Functor t, HasText h)
=>
TermType
Lang
=>
TermType
Lang
->
t
h
->
t
h
->
TermType
Lang
->
TermType
Lang
withLang
(
Unsupervised
l
n
s
m
)
ns
=
Unsupervised
l
n
s
m'
withLang
(
Unsupervised
{
..
})
ns
=
Unsupervised
{
_tt_model
=
m'
,
..
}
where
where
m'
=
case
m
of
m'
=
case
_tt_model
of
Nothing
->
-- trace ("buildTries here" :: String)
Nothing
->
-- trace ("buildTries here" :: String)
Just
$
buildTries
n
$
fmap
toToken
Just
$
buildTries
_tt_ngramsSize
$
uniText
$
fmap
toToken
$
Text
.
intercalate
" . "
$
uniText
$
List
.
concat
$
Text
.
intercalate
" . "
$
map
hasText
ns
$
List
.
concat
$
map
hasText
ns
just_m
->
just_m
just_m
->
just_m
withLang
l
_
=
l
withLang
l
_
=
l
...
@@ -171,9 +172,9 @@ terms :: TermType Lang -> Text -> IO [Terms]
...
@@ -171,9 +172,9 @@ terms :: TermType Lang -> Text -> IO [Terms]
terms
(
Mono
lang
)
txt
=
pure
$
monoTerms
lang
txt
terms
(
Mono
lang
)
txt
=
pure
$
monoTerms
lang
txt
terms
(
Multi
lang
)
txt
=
multiterms
lang
txt
terms
(
Multi
lang
)
txt
=
multiterms
lang
txt
terms
(
MonoMulti
lang
)
txt
=
terms
(
Multi
lang
)
txt
terms
(
MonoMulti
lang
)
txt
=
terms
(
Multi
lang
)
txt
terms
(
Unsupervised
lang
n
s
m
)
txt
=
termsUnsupervised
(
Unsupervised
lang
n
s
(
Just
m'
)
)
txt
terms
(
Unsupervised
{
..
})
txt
=
termsUnsupervised
(
Unsupervised
{
_tt_model
=
Just
m'
,
..
}
)
txt
where
where
m'
=
maybe
(
newTries
n
txt
)
identity
m
m'
=
maybe
(
newTries
_tt_ngramsSize
txt
)
identity
_tt_model
-- terms (WithList list) txt = pure . concat $ extractTermsWithList list txt
-- terms (WithList list) txt = pure . concat $ extractTermsWithList list txt
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment