Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
H
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Przemyslaw Kaminski
haskell-gargantext
Commits
c3728bf6
Unverified
Commit
c3728bf6
authored
Jul 03, 2018
by
Nicolas Pouillard
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Fix splitting and show progress
parent
f33b0c12
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
27 additions
and
8 deletions
+27
-8
Main.hs
bin/gargantext-cli/Main.hs
+17
-3
CSV.hs
src/Gargantext/Text/List/CSV.hs
+2
-1
Mono.hs
src/Gargantext/Text/Terms/Mono.hs
+1
-2
WithList.hs
src/Gargantext/Text/Terms/WithList.hs
+7
-2
No files found.
bin/gargantext-cli/Main.hs
View file @
c3728bf6
...
...
@@ -24,7 +24,11 @@ module Main where
import
qualified
Data.Vector
as
DV
import
Control.Monad
(
zipWithM
)
import
Control.Monad.IO.Class
import
Data.Text
(
Text
)
import
Data.List
(
cycle
)
import
System.IO
(
hPutStr
,
hFlush
,
stderr
)
import
System.Environment
--import Control.Concurrent.Async as CCA (mapConcurrently)
...
...
@@ -37,6 +41,17 @@ import Gargantext.Text.List.CSV (csvGraphTermList)
import
Gargantext.Text.Terms
(
terms
)
import
Gargantext.Text.Metrics.Count
(
cooc
)
mapMP
::
MonadIO
m
=>
(
a
->
m
b
)
->
[
a
]
->
m
[
b
]
mapMP
f
xs
=
do
bs
<-
zipWithM
g
(
cycle
"-
\\
|/"
)
xs
liftIO
$
hPutStr
stderr
"
\r
Done
\n
"
pure
bs
where
g
c
x
=
do
liftIO
$
hPutStr
stderr
[
'
\r
'
,
c
]
liftIO
$
hFlush
stderr
f
x
main
::
IO
()
main
=
do
[
corpusFile
,
termListFile
,
outputFile
]
<-
getArgs
...
...
@@ -53,9 +68,8 @@ main = do
putStrLn
$
show
$
length
termList
let
patterns
=
WithList
$
buildPatterns
termList
corpusIndexed
<-
mapM
(
terms
patterns
)
corpus
putStrLn
$
show
corpusIndexed
corpusIndexed
<-
mapMP
(
terms
patterns
)
corpus
mapM
(
putStrLn
.
show
)
corpusIndexed
let
myCooc
=
cooc
corpusIndexed
putStrLn
$
show
myCooc
...
...
src/Gargantext/Text/List/CSV.hs
View file @
c3728bf6
...
...
@@ -25,6 +25,7 @@ import Control.Monad (mzero)
import
Data.Char
(
ord
)
import
Data.Csv
import
Data.Either
(
Either
(
Left
,
Right
))
import
Data.List
(
null
)
import
Data.Text
(
Text
,
pack
)
import
qualified
Data.Text
as
DT
import
qualified
Data.ByteString.Lazy
as
BL
...
...
@@ -42,7 +43,7 @@ csvGraphTermList fp = csv2list CsvMap <$> snd <$> fromCsvListFile fp
csv2list
::
CsvListType
->
Vector
CsvList
->
TermList
csv2list
lt
vs
=
V
.
toList
$
V
.
map
(
\
(
CsvList
_
label
forms
)
->
(
DT
.
words
label
,
map
DT
.
words
$
DT
.
splitOn
csvListFormsDelimiter
forms
))
->
(
DT
.
words
label
,
filter
(
not
.
null
)
.
map
DT
.
words
$
DT
.
splitOn
csvListFormsDelimiter
forms
))
$
V
.
filter
(
\
l
->
csvList_status
l
==
lt
)
vs
------------------------------------------------------------------------
...
...
src/Gargantext/Text/Terms/Mono.hs
View file @
c3728bf6
...
...
@@ -17,7 +17,6 @@ module Gargantext.Text.Terms.Mono (monoTerms, monoTexts, monoTextsBySentence)
where
import
Prelude
(
String
)
import
Data.Char
(
isSpace
)
import
Data.Text
(
Text
)
import
qualified
Data.Text
as
T
...
...
@@ -50,7 +49,7 @@ monoText2term :: Lang -> Text -> Terms
monoText2term
lang
txt
=
Terms
[
txt
]
(
S
.
singleton
$
stem
lang
txt
)
monoTextsBySentence
::
Text
->
[[
Text
]]
monoTextsBySentence
=
map
(
T
.
split
isSpace
)
monoTextsBySentence
=
map
T
.
words
.
T
.
split
isSep
.
T
.
toLower
...
...
src/Gargantext/Text/Terms/WithList.hs
View file @
c3728bf6
...
...
@@ -12,6 +12,7 @@ commentary with @some markup@.
-}
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE BangPatterns #-}
module
Gargantext.Text.Terms.WithList
where
...
...
@@ -24,8 +25,9 @@ import Gargantext.Core.Types (Terms(..))
import
Gargantext.Text.Context
import
Gargantext.Text.Terms.Mono
(
monoTextsBySentence
)
import
Prelude
(
error
)
import
Gargantext.Prelude
import
Data.List
(
concatMap
)
import
Data.List
(
null
,
concatMap
)
import
Data.Ord
import
qualified
Data.Set
as
Set
...
...
@@ -67,7 +69,10 @@ buildPatterns = sortWith (Down . _pat_length) . concatMap buildPattern
where
buildPattern
(
label
,
alts
)
=
map
f
(
label
:
alts
)
where
f
alt
=
Pattern
(
KMP
.
build
alt
)
(
length
alt
)
f
alt
|
""
`
elem
`
alt
=
error
"buildPatterns: ERR1"
|
null
alt
=
error
"buildPatterns: ERR2"
|
otherwise
=
Pattern
(
KMP
.
build
alt
)
(
length
alt
)
(
Terms
label
$
Set
.
empty
)
-- TODO check stems
extractTermsWithList
::
Patterns
->
Text
->
Corpus
Terms
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment