Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
158
Issues
158
List
Board
Labels
Milestones
Merge Requests
11
Merge Requests
11
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
gargantext
haskell-gargantext
Commits
64f251bc
Commit
64f251bc
authored
May 27, 2021
by
Przemyslaw Kaminski
Browse files
Options
Browse Files
Download
Plain Diff
Merge branch 'dev' into dev-test
parents
55f89c13
67c34520
Pipeline
#1470
failed with stage
Changes
3
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
33 additions
and
26 deletions
+33
-26
Parsers.hs
src/Gargantext/Core/Text/Corpus/Parsers.hs
+5
-5
Learn.hs
src/Gargantext/Core/Text/Learn.hs
+1
-1
List.hs
src/Gargantext/Core/Text/List.hs
+27
-20
No files found.
src/Gargantext/Core/Text/Corpus/Parsers.hs
View file @
64f251bc
...
...
@@ -41,7 +41,7 @@ import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..))
import
Gargantext.Prelude
import
Gargantext.Core.Text.Corpus.Parsers.CSV
(
parseHal
,
parseHal'
,
parseCsv
,
parseCsv'
)
import
Gargantext.Core.Text.Corpus.Parsers.RIS.Presse
(
presseEnrich
)
import
Gargantext.Core.Text.Learn
(
detectLangDefault
)
--
import Gargantext.Core.Text.Learn (detectLangDefault)
import
System.FilePath
(
FilePath
(),
takeExtension
)
import
qualified
Data.ByteString
as
DB
import
qualified
Data.ByteString.Char8
as
DBC
...
...
@@ -103,11 +103,11 @@ parseFile ff p = join $ mapM (toDoc ff) <$> snd <$> enrichWith ff
toDoc
::
FileFormat
->
[(
Text
,
Text
)]
->
IO
HyperdataDocument
-- TODO use language for RIS
toDoc
ff
d
=
do
let
abstract
=
lookup
"abstract"
d
let
lang
=
maybe
EN
identity
(
join
$
detectLangDefault
<$>
(
fmap
(
DT
.
take
50
)
abstract
))
--
let abstract = lookup "abstract" d
let
lang
=
EN
--
maybe EN identity (join $ detectLangDefault <$> (fmap (DT.take 50) abstract))
let
dateToParse
=
DT
.
replace
"-"
" "
<$>
lookup
"PY"
d
<>
Just
" "
<>
lookup
"publication_date"
d
(
utcTime
,
(
pub_year
,
pub_month
,
pub_day
))
<-
Date
.
dateSplit
lang
dateToParse
pure
$
HyperdataDocument
(
Just
$
DT
.
pack
$
show
ff
)
...
...
src/Gargantext/Core/Text/Learn.hs
View file @
64f251bc
...
...
@@ -112,7 +112,7 @@ detectLangDefault = detectCat 99 eventLang
textSample
::
Lang
->
String
textSample
EN
=
EN
.
textSample
textSample
FR
=
FR
.
textSample
textSample
_
=
panic
"
textSample:
not impl yet"
textSample
_
=
panic
"
[G.C.T.L:detectLangDefault]
not impl yet"
--textSample DE = DE.textSample
--textSample SP = SP.textSample
--textSample CH = CH.textSample
...
...
src/Gargantext/Core/Text/List.hs
View file @
64f251bc
...
...
@@ -256,41 +256,50 @@ buildNgramsTermsList user uCid mCid groupParams (nt, _mapListSize)= do
-- splitAt
let
-- use % of list if to big, or Int if to small
listSizeLocal
=
1000
::
Double
mapSize
=
1000
::
Double
canSize
=
mapSize
*
10
::
Double
inclSize
=
0.4
::
Double
exclSize
=
1
-
inclSize
splitAt'
n'
=
(
both
(
HashMap
.
fromList
))
.
(
List
.
splitAt
(
round
$
n'
*
listSizeLocal
))
splitAt'
max'
n'
=
(
both
(
HashMap
.
fromList
))
.
(
List
.
splitAt
(
round
$
n'
*
max'
))
sortOn
f
=
(
List
.
sortOn
(
Down
.
(
view
(
gts'_score
.
f
))
.
snd
))
.
HashMap
.
toList
monoInc_size
n
=
splitAt'
n
$
monoSize
*
inclSize
/
2
multExc_size
n
=
splitAt'
n
$
multSize
*
exclSize
/
2
(
mapMonoScoredInclHead
,
monoScoredInclTail
)
=
monoInc_size
mapSize
$
(
sortOn
scored_genInc
)
monoScoredIncl
(
mapMonoScoredExclHead
,
monoScoredExclTail
)
=
monoInc_size
mapSize
$
(
sortOn
scored_speExc
)
monoScoredExcl
monoInc_size
=
splitAt'
$
monoSize
*
inclSize
/
2
(
monoScoredInclHead
,
_monoScoredInclTail
)
=
monoInc_size
$
(
sortOn
scored_genInc
)
monoScoredIncl
(
monoScoredExclHead
,
_monoScoredExclTail
)
=
monoInc_size
$
(
sortOn
scored_speExc
)
monoScoredExcl
(
mapMultScoredInclHead
,
multScoredInclTail
)
=
multExc_size
mapSize
$
(
sortOn
scored_genInc
)
multScoredIncl
(
mapMultScoredExclHead
,
multScoredExclTail
)
=
multExc_size
mapSize
$
(
sortOn
scored_speExc
)
multScoredExcl
multExc_size
=
splitAt'
$
multSize
*
exclSize
/
2
(
multScoredInclHead
,
multScoredInclTail
)
=
multExc_size
$
(
sortOn
scored_genInc
)
multScoredIncl
(
multScoredExclHead
,
multScoredExclTail
)
=
multExc_size
$
(
sortOn
scored_speExc
)
multScoredExcl
printDebug
"stopWords"
stopTerms
(
canMonoScoredIncHead
,
_
)
=
monoInc_size
canSize
$
(
sortOn
scored_genInc
)
monoScoredInclTail
(
canMonoScoredExclHead
,
_
)
=
monoInc_size
canSize
$
(
sortOn
scored_speExc
)
monoScoredExclTail
(
canMulScoredInclHead
,
_
)
=
multExc_size
canSize
$
(
sortOn
scored_genInc
)
multScoredInclTail
(
canMultScoredExclHead
,
_
)
=
multExc_size
canSize
$
(
sortOn
scored_speExc
)
multScoredExclTail
------------------------------------------------------------
-- Final Step building the Typed list
-- Candidates Terms need to be filtered
let
maps
=
setListType
(
Just
MapTerm
)
$
monoScoredInclHead
<>
monoScoredExclHead
<>
multScoredInclHead
<>
multScoredExclHead
$
m
apM
onoScoredInclHead
<>
m
apM
onoScoredExclHead
<>
m
apM
ultScoredInclHead
<>
m
apM
ultScoredExclHead
-- An original way to filter to start with
cands
=
setListType
(
Just
CandidateTerm
)
$
{- monoScoredInclTail
<>
monoScoredExclTail
<>
-}
multScoredInclTail
<>
multScoredExclTail
cands
=
setListType
(
Just
CandidateTerm
)
$
canMonoScoredIncHead
<>
canMonoScoredExclHead
<>
canMulScoredInclHead
<>
canMultScoredExclHead
-- TODO count it too
cands'
=
setListType
(
Just
CandidateTerm
)
{-$ groupedMonoTail
<>-}
groupedMultTail
...
...
@@ -303,6 +312,4 @@ buildNgramsTermsList user uCid mCid groupParams (nt, _mapListSize)= do
)]
]
-- printDebug "result" result
pure
result
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment