Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
H
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Przemyslaw Kaminski
haskell-gargantext
Commits
64f251bc
Commit
64f251bc
authored
May 27, 2021
by
Przemyslaw Kaminski
Browse files
Options
Browse Files
Download
Plain Diff
Merge branch 'dev' into dev-test
parents
55f89c13
67c34520
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
33 additions
and
26 deletions
+33
-26
Parsers.hs
src/Gargantext/Core/Text/Corpus/Parsers.hs
+5
-5
Learn.hs
src/Gargantext/Core/Text/Learn.hs
+1
-1
List.hs
src/Gargantext/Core/Text/List.hs
+27
-20
No files found.
src/Gargantext/Core/Text/Corpus/Parsers.hs
View file @
64f251bc
...
...
@@ -41,7 +41,7 @@ import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..))
import
Gargantext.Prelude
import
Gargantext.Core.Text.Corpus.Parsers.CSV
(
parseHal
,
parseHal'
,
parseCsv
,
parseCsv'
)
import
Gargantext.Core.Text.Corpus.Parsers.RIS.Presse
(
presseEnrich
)
import
Gargantext.Core.Text.Learn
(
detectLangDefault
)
--
import Gargantext.Core.Text.Learn (detectLangDefault)
import
System.FilePath
(
FilePath
(),
takeExtension
)
import
qualified
Data.ByteString
as
DB
import
qualified
Data.ByteString.Char8
as
DBC
...
...
@@ -103,11 +103,11 @@ parseFile ff p = join $ mapM (toDoc ff) <$> snd <$> enrichWith ff
toDoc
::
FileFormat
->
[(
Text
,
Text
)]
->
IO
HyperdataDocument
-- TODO use language for RIS
toDoc
ff
d
=
do
let
abstract
=
lookup
"abstract"
d
let
lang
=
maybe
EN
identity
(
join
$
detectLangDefault
<$>
(
fmap
(
DT
.
take
50
)
abstract
))
--
let abstract = lookup "abstract" d
let
lang
=
EN
--
maybe EN identity (join $ detectLangDefault <$> (fmap (DT.take 50) abstract))
let
dateToParse
=
DT
.
replace
"-"
" "
<$>
lookup
"PY"
d
<>
Just
" "
<>
lookup
"publication_date"
d
(
utcTime
,
(
pub_year
,
pub_month
,
pub_day
))
<-
Date
.
dateSplit
lang
dateToParse
pure
$
HyperdataDocument
(
Just
$
DT
.
pack
$
show
ff
)
...
...
src/Gargantext/Core/Text/Learn.hs
View file @
64f251bc
...
...
@@ -112,7 +112,7 @@ detectLangDefault = detectCat 99 eventLang
textSample
::
Lang
->
String
textSample
EN
=
EN
.
textSample
textSample
FR
=
FR
.
textSample
textSample
_
=
panic
"
textSample:
not impl yet"
textSample
_
=
panic
"
[G.C.T.L:detectLangDefault]
not impl yet"
--textSample DE = DE.textSample
--textSample SP = SP.textSample
--textSample CH = CH.textSample
...
...
src/Gargantext/Core/Text/List.hs
View file @
64f251bc
...
...
@@ -256,41 +256,50 @@ buildNgramsTermsList user uCid mCid groupParams (nt, _mapListSize)= do
-- splitAt
let
-- use % of list if to big, or Int if to small
listSizeLocal
=
1000
::
Double
mapSize
=
1000
::
Double
canSize
=
mapSize
*
10
::
Double
inclSize
=
0.4
::
Double
exclSize
=
1
-
inclSize
splitAt'
n'
=
(
both
(
HashMap
.
fromList
))
.
(
List
.
splitAt
(
round
$
n'
*
listSizeLocal
))
splitAt'
max'
n'
=
(
both
(
HashMap
.
fromList
))
.
(
List
.
splitAt
(
round
$
n'
*
max'
))
sortOn
f
=
(
List
.
sortOn
(
Down
.
(
view
(
gts'_score
.
f
))
.
snd
))
.
HashMap
.
toList
monoInc_size
n
=
splitAt'
n
$
monoSize
*
inclSize
/
2
multExc_size
n
=
splitAt'
n
$
multSize
*
exclSize
/
2
(
mapMonoScoredInclHead
,
monoScoredInclTail
)
=
monoInc_size
mapSize
$
(
sortOn
scored_genInc
)
monoScoredIncl
(
mapMonoScoredExclHead
,
monoScoredExclTail
)
=
monoInc_size
mapSize
$
(
sortOn
scored_speExc
)
monoScoredExcl
monoInc_size
=
splitAt'
$
monoSize
*
inclSize
/
2
(
monoScoredInclHead
,
_monoScoredInclTail
)
=
monoInc_size
$
(
sortOn
scored_genInc
)
monoScoredIncl
(
monoScoredExclHead
,
_monoScoredExclTail
)
=
monoInc_size
$
(
sortOn
scored_speExc
)
monoScoredExcl
(
mapMultScoredInclHead
,
multScoredInclTail
)
=
multExc_size
mapSize
$
(
sortOn
scored_genInc
)
multScoredIncl
(
mapMultScoredExclHead
,
multScoredExclTail
)
=
multExc_size
mapSize
$
(
sortOn
scored_speExc
)
multScoredExcl
multExc_size
=
splitAt'
$
multSize
*
exclSize
/
2
(
multScoredInclHead
,
multScoredInclTail
)
=
multExc_size
$
(
sortOn
scored_genInc
)
multScoredIncl
(
multScoredExclHead
,
multScoredExclTail
)
=
multExc_size
$
(
sortOn
scored_speExc
)
multScoredExcl
printDebug
"stopWords"
stopTerms
(
canMonoScoredIncHead
,
_
)
=
monoInc_size
canSize
$
(
sortOn
scored_genInc
)
monoScoredInclTail
(
canMonoScoredExclHead
,
_
)
=
monoInc_size
canSize
$
(
sortOn
scored_speExc
)
monoScoredExclTail
(
canMulScoredInclHead
,
_
)
=
multExc_size
canSize
$
(
sortOn
scored_genInc
)
multScoredInclTail
(
canMultScoredExclHead
,
_
)
=
multExc_size
canSize
$
(
sortOn
scored_speExc
)
multScoredExclTail
------------------------------------------------------------
-- Final Step building the Typed list
-- Candidates Terms need to be filtered
let
maps
=
setListType
(
Just
MapTerm
)
$
monoScoredInclHead
<>
monoScoredExclHead
<>
multScoredInclHead
<>
multScoredExclHead
$
m
apM
onoScoredInclHead
<>
m
apM
onoScoredExclHead
<>
m
apM
ultScoredInclHead
<>
m
apM
ultScoredExclHead
-- An original way to filter to start with
cands
=
setListType
(
Just
CandidateTerm
)
$
{- monoScoredInclTail
<>
monoScoredExclTail
<>
-}
multScoredInclTail
<>
multScoredExclTail
cands
=
setListType
(
Just
CandidateTerm
)
$
canMonoScoredIncHead
<>
canMonoScoredExclHead
<>
canMulScoredInclHead
<>
canMultScoredExclHead
-- TODO count it too
cands'
=
setListType
(
Just
CandidateTerm
)
{-$ groupedMonoTail
<>-}
groupedMultTail
...
...
@@ -303,6 +312,4 @@ buildNgramsTermsList user uCid mCid groupParams (nt, _mapListSize)= do
)]
]
-- printDebug "result" result
pure
result
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment