Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
160
Issues
160
List
Board
Labels
Milestones
Merge Requests
8
Merge Requests
8
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
gargantext
haskell-gargantext
Commits
c3728bf6
Unverified
Commit
c3728bf6
authored
6 years ago
by
Nicolas Pouillard
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Fix splitting and show progress
parent
f33b0c12
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
27 additions
and
8 deletions
+27
-8
Main.hs
bin/gargantext-cli/Main.hs
+17
-3
CSV.hs
src/Gargantext/Text/List/CSV.hs
+2
-1
Mono.hs
src/Gargantext/Text/Terms/Mono.hs
+1
-2
WithList.hs
src/Gargantext/Text/Terms/WithList.hs
+7
-2
No files found.
bin/gargantext-cli/Main.hs
View file @
c3728bf6
...
...
@@ -24,7 +24,11 @@ module Main where
import
qualified
Data.Vector
as
DV
import
Control.Monad
(
zipWithM
)
import
Control.Monad.IO.Class
import
Data.Text
(
Text
)
import
Data.List
(
cycle
)
import
System.IO
(
hPutStr
,
hFlush
,
stderr
)
import
System.Environment
--import Control.Concurrent.Async as CCA (mapConcurrently)
...
...
@@ -37,6 +41,17 @@ import Gargantext.Text.List.CSV (csvGraphTermList)
import
Gargantext.Text.Terms
(
terms
)
import
Gargantext.Text.Metrics.Count
(
cooc
)
mapMP
::
MonadIO
m
=>
(
a
->
m
b
)
->
[
a
]
->
m
[
b
]
mapMP
f
xs
=
do
bs
<-
zipWithM
g
(
cycle
"-
\\
|/"
)
xs
liftIO
$
hPutStr
stderr
"
\r
Done
\n
"
pure
bs
where
g
c
x
=
do
liftIO
$
hPutStr
stderr
[
'
\r
'
,
c
]
liftIO
$
hFlush
stderr
f
x
main
::
IO
()
main
=
do
[
corpusFile
,
termListFile
,
outputFile
]
<-
getArgs
...
...
@@ -53,9 +68,8 @@ main = do
putStrLn
$
show
$
length
termList
let
patterns
=
WithList
$
buildPatterns
termList
corpusIndexed
<-
mapM
(
terms
patterns
)
corpus
putStrLn
$
show
corpusIndexed
corpusIndexed
<-
mapMP
(
terms
patterns
)
corpus
mapM
(
putStrLn
.
show
)
corpusIndexed
let
myCooc
=
cooc
corpusIndexed
putStrLn
$
show
myCooc
...
...
This diff is collapsed.
Click to expand it.
src/Gargantext/Text/List/CSV.hs
View file @
c3728bf6
...
...
@@ -25,6 +25,7 @@ import Control.Monad (mzero)
import
Data.Char
(
ord
)
import
Data.Csv
import
Data.Either
(
Either
(
Left
,
Right
))
import
Data.List
(
null
)
import
Data.Text
(
Text
,
pack
)
import
qualified
Data.Text
as
DT
import
qualified
Data.ByteString.Lazy
as
BL
...
...
@@ -42,7 +43,7 @@ csvGraphTermList fp = csv2list CsvMap <$> snd <$> fromCsvListFile fp
csv2list
::
CsvListType
->
Vector
CsvList
->
TermList
csv2list
lt
vs
=
V
.
toList
$
V
.
map
(
\
(
CsvList
_
label
forms
)
->
(
DT
.
words
label
,
map
DT
.
words
$
DT
.
splitOn
csvListFormsDelimiter
forms
))
->
(
DT
.
words
label
,
filter
(
not
.
null
)
.
map
DT
.
words
$
DT
.
splitOn
csvListFormsDelimiter
forms
))
$
V
.
filter
(
\
l
->
csvList_status
l
==
lt
)
vs
------------------------------------------------------------------------
...
...
This diff is collapsed.
Click to expand it.
src/Gargantext/Text/Terms/Mono.hs
View file @
c3728bf6
...
...
@@ -17,7 +17,6 @@ module Gargantext.Text.Terms.Mono (monoTerms, monoTexts, monoTextsBySentence)
where
import
Prelude
(
String
)
import
Data.Char
(
isSpace
)
import
Data.Text
(
Text
)
import
qualified
Data.Text
as
T
...
...
@@ -50,7 +49,7 @@ monoText2term :: Lang -> Text -> Terms
monoText2term
lang
txt
=
Terms
[
txt
]
(
S
.
singleton
$
stem
lang
txt
)
monoTextsBySentence
::
Text
->
[[
Text
]]
monoTextsBySentence
=
map
(
T
.
split
isSpace
)
monoTextsBySentence
=
map
T
.
words
.
T
.
split
isSep
.
T
.
toLower
...
...
This diff is collapsed.
Click to expand it.
src/Gargantext/Text/Terms/WithList.hs
View file @
c3728bf6
...
...
@@ -12,6 +12,7 @@ commentary with @some markup@.
-}
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE BangPatterns #-}
module
Gargantext.Text.Terms.WithList
where
...
...
@@ -24,8 +25,9 @@ import Gargantext.Core.Types (Terms(..))
import
Gargantext.Text.Context
import
Gargantext.Text.Terms.Mono
(
monoTextsBySentence
)
import
Prelude
(
error
)
import
Gargantext.Prelude
import
Data.List
(
concatMap
)
import
Data.List
(
null
,
concatMap
)
import
Data.Ord
import
qualified
Data.Set
as
Set
...
...
@@ -67,7 +69,10 @@ buildPatterns = sortWith (Down . _pat_length) . concatMap buildPattern
where
buildPattern
(
label
,
alts
)
=
map
f
(
label
:
alts
)
where
f
alt
=
Pattern
(
KMP
.
build
alt
)
(
length
alt
)
f
alt
|
""
`
elem
`
alt
=
error
"buildPatterns: ERR1"
|
null
alt
=
error
"buildPatterns: ERR2"
|
otherwise
=
Pattern
(
KMP
.
build
alt
)
(
length
alt
)
(
Terms
label
$
Set
.
empty
)
-- TODO check stems
extractTermsWithList
::
Patterns
->
Text
->
Corpus
Terms
...
...
This diff is collapsed.
Click to expand it.
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment