Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
158
Issues
158
List
Board
Labels
Milestones
Merge Requests
11
Merge Requests
11
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
gargantext
haskell-gargantext
Commits
3f8c2b19
Commit
3f8c2b19
authored
Apr 18, 2018
by
Alexandre Delanoë
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
[CODE/READ] with NP.
parent
a1f70708
Changes
8
Hide whitespace changes
Inline
Side-by-side
Showing
8 changed files
with
48 additions
and
33 deletions
+48
-33
package.yaml
package.yaml
+1
-0
Ngrams.hs
src/Gargantext/Ngrams.hs
+15
-16
CoreNLP.hs
src/Gargantext/Ngrams/CoreNLP.hs
+1
-14
FrequentItemSet.hs
src/Gargantext/Ngrams/FrequentItemSet.hs
+1
-0
List.hs
src/Gargantext/Ngrams/List.hs
+16
-0
Parsers.hs
src/Gargantext/Parsers.hs
+8
-1
Prelude.hs
src/Gargantext/Prelude.hs
+4
-0
Prefix.hs
src/Gargantext/Utils/Prefix.hs
+2
-2
No files found.
package.yaml
View file @
3f8c2b19
...
...
@@ -115,6 +115,7 @@ library:
-
servant-static-th
-
split
-
stemmer
-
string-conversions
-
swagger2
-
tagsoup
-
text-metrics
...
...
src/Gargantext/Ngrams.hs
View file @
3f8c2b19
...
...
@@ -22,7 +22,7 @@ module Gargantext.Ngrams ( module Gargantext.Ngrams.Letters
,
module
Gargantext
.
Ngrams
.
TextMining
,
module
Gargantext
.
Ngrams
.
Metrics
,
Ngrams
(
..
),
ngrams
,
occ
,
sumOcc
,
text2fis
,
NgramsList
(
..
)
,
ListName
(
..
),
equivNgrams
,
isGram
--, module Gargantext.Ngrams.Words
)
where
...
...
@@ -40,7 +40,7 @@ import qualified Gargantext.Ngrams.FrequentItemSet as FIS
-----------------------------------------------------------------
import
Data.List
(
sort
)
import
Data.Char
(
Char
,
isAlpha
,
isSpace
)
import
Data.Char
(
Char
,
isAlpha
Num
,
isSpace
)
import
Data.Text
(
Text
,
words
,
filter
,
toLower
)
import
Data.Map.Strict
(
Map
,
empty
...
...
@@ -58,32 +58,31 @@ import Gargantext.Prelude hiding (filter)
--import Language.Aspell.Options (ACOption(..))
data
NgramsList
=
Stop
|
Candidate
|
Graph
data
ListName
=
Stop
|
Candidate
|
Graph
deriving
(
Show
,
Eq
)
data
Ngrams
=
Ngrams
{
_ngramsNgrams
::
[
Text
]
,
_ngramsStem
::
[
Text
]
,
_ngramsList
::
Maybe
NgramsList
data
Ngrams
=
Ngrams
{
_ngramsNgrams
::
[
Text
]
,
_ngramsStem
::
[
Text
]
,
_ngramsList
Name
::
Maybe
ListName
}
deriving
(
Show
)
instance
Eq
Ngrams
where
Ngrams
n1
s1
_
==
Ngrams
n2
s2
_
=
(
sort
n1
)
==
(
sort
n2
)
||
(
sort
s1
)
==
(
sort
s2
)
equivNgrams
::
Ngrams
->
Ngrams
->
Bool
equivNgrams
(
Ngrams
n1
s1
_
)
(
Ngrams
n2
s2
_
)
=
(
sort
n1
)
==
(
sort
n2
)
||
(
sort
s1
)
==
(
sort
s2
)
type
Occ
=
Int
--type Index = Int
-- Data Ngrams = Monograms | MultiGrams
ngrams
::
Text
->
[
Text
]
ngrams
xs
=
monograms
$
toLower
$
filter
is
Char
xs
ngrams
xs
=
monograms
$
toLower
$
filter
is
Gram
xs
monograms
::
Text
->
[
Text
]
monograms
=
words
-- TODO
-- 12-b
isChar
::
Char
->
Bool
isChar
'-'
=
True
isChar
'/'
=
True
isChar
c
=
isAlpha
c
||
isSpace
c
isGram
::
Char
->
Bool
isGram
c
=
isAlphaNum
c
||
isSpace
c
||
c
`
elem
`
[
'-'
,
'/'
]
-- | Compute the occurrences (occ)
occ
::
Ord
a
=>
[
a
]
->
Map
a
Occ
...
...
@@ -91,7 +90,7 @@ occ xs = foldl' (\x y -> insertWith (+) y 1 x) empty xs
-- TODO add groups and filter stops
sumOcc
::
Ord
a
=>
[
Map
a
Occ
]
->
Map
a
Occ
sumOcc
xs
=
foldl'
(
\
x
y
->
unionWith
(
+
)
x
y
)
empty
xs
sumOcc
xs
=
foldl'
(
unionWith
(
+
)
)
empty
xs
--noApax :: Ord a => Map a Occ -> Map a Occ
--noApax m = M.filter (>1) m
...
...
src/Gargantext/Ngrams/CoreNLP.hs
View file @
3f8c2b19
...
...
@@ -28,8 +28,6 @@ import Gargantext.Prelude
import
Gargantext.Utils.Prefix
(
unPrefix
)
import
Data.Text
(
Text
)
import
qualified
Data.ByteString.Char8
as
S8
import
qualified
Data.Yaml
as
Yaml
import
Network.HTTP.Simple
...
...
@@ -82,17 +80,6 @@ $(deriveJSON (unPrefix "_") ''Sentences)
--
corenlpPretty
::
Text
->
IO
()
corenlpPretty
txt
=
do
url
<-
parseRequest
"POST http://localhost:9000/?properties={
\"
annotators
\"
:
\"
tokenize,ssplit,pos,ner
\"
,
\"
outputFormat
\"
:
\"
json
\"
}"
let
request
=
setRequestBodyJSON
txt
url
response
<-
httpJSON
request
-- putStrLn $ "The status code was: " ++
-- show (getResponseStatusCode response)
-- print $ getResponseHeader "Content-Type" response
S8
.
putStrLn
$
Yaml
.
encode
(
getResponseBody
response
::
Sentences
)
corenlp
::
Language
->
Text
->
IO
Sentences
corenlp
lang
txt
=
do
let
properties
=
case
lang
of
...
...
@@ -100,7 +87,7 @@ corenlp lang txt = do
-- FR -> "{\"annotators\": \"tokenize,ssplit,pos,ner\", \"outputFormat\": \"json\"}"
FR
->
"{
\"
annotators
\"
:
\"
tokenize,ssplit,pos,ner
\"
,
\"
parse.model
\"
:
\"
edu/stanford/nlp/models/lexparser/frenchFactored.ser.gz
\"
,
\"
pos.model
\"
:
\"
edu/stanford/nlp/models/pos-tagger/french/french.tagger
\"
,
\"
tokenize.language
\"
:
\"
fr
\"
,
\"
outputFormat
\"
:
\"
json
\"
}"
url
<-
parseRequest
$
"POST http://localhost:9000/?properties="
<>
properties
let
request
=
setRequestBody
JSON
txt
url
let
request
=
setRequestBody
LBS
(
cs
txt
)
url
response
<-
httpJSON
request
pure
(
getResponseBody
response
::
Sentences
)
...
...
src/Gargantext/Ngrams/FrequentItemSet.hs
View file @
3f8c2b19
...
...
@@ -28,6 +28,7 @@ import Gargantext.Prelude
type
Size
=
Either
Int
(
Int
,
Int
)
--data Size = Point | Segment
------------------------------------------------------------------------
-- | Occurrence is Frequent Item Set of size 1
...
...
src/Gargantext/Ngrams/List.hs
0 → 100644
View file @
3f8c2b19
module
Gargantext.Ngrams.List
where
import
Data.Maybe
import
Data.List
(
filter
)
import
Gargantext.Ngrams
import
Gargantext.Prelude
graph
::
[
Ngrams
]
->
[
Ngrams
]
graph
ngs
=
filter
(
\
ng
->
_ngramsListName
ng
==
Just
Graph
)
ngs
candidates
::
[
Ngrams
]
->
[
Ngrams
]
candidates
ngs
=
filter
(
\
ng
->
_ngramsListName
ng
==
Just
Candidate
)
ngs
stop
::
[
Ngrams
]
->
[
Ngrams
]
stop
ngs
=
filter
(
\
ng
->
_ngramsListName
ng
==
Just
Stop
)
ngs
src/Gargantext/Parsers.hs
View file @
3f8c2b19
...
...
@@ -52,6 +52,14 @@ import Gargantext.Parsers.WOS (wosParser)
--import Gargantext.Prelude (pm)
--import Gargantext.Types.Main (ErrorMessage(), Corpus)
-- FIXME
--type Field = Text
type
ParseError
=
String
--
--data Corpus = Corpus { _corpusErrors :: [ParseError]
-- , _corpusMap :: Map FilePath (Map Field Text)
-- }
-- | According to the format of Input file,
-- different parser are available.
...
...
@@ -63,7 +71,6 @@ data FileFormat = WOS -- Implemented (ISI Format)
-- -- > http://chrisdone.com/posts/fast-haskell-c-parsing-xml
-- TODO: to debug maybe add the filepath in error message
type
ParseError
=
String
parse
::
FileFormat
->
FilePath
->
IO
([
ParseError
],
[[(
Text
,
Text
)]])
...
...
src/Gargantext/Prelude.hs
View file @
3f8c2b19
...
...
@@ -12,6 +12,7 @@ module Gargantext.Prelude
,
headMay
,
module
Text
.
Show
,
module
Text
.
Read
,
cs
)
where
...
...
@@ -30,6 +31,7 @@ import Protolude ( Bool(True, False), Int, Double, Integer
,
Eq
,
(
==
),
(
>=
),
(
<=
),
(
<>
)
,
(
&&
),
(
||
),
not
,
fst
,
snd
,
toS
,
elem
)
-- TODO import functions optimized in Utils.Count
...
...
@@ -43,6 +45,8 @@ import qualified Data.Vector as V
import
Safe
(
headMay
)
import
Text.Show
(
Show
(),
show
)
import
Text.Read
(
Read
())
import
Data.String.Conversions
(
cs
)
--pf :: (a -> Bool) -> [a] -> [a]
--pf = filter
...
...
src/Gargantext/Utils/Prefix.hs
View file @
3f8c2b19
...
...
@@ -21,8 +21,8 @@ unPrefix prefix = defaultOptions
-- | Lower case leading character
unCapitalize
::
String
->
String
unCapitalize
[]
=
[]
--
unCapitalize (c:cs) = toLower c : cs
unCapitalize
cs
=
map
toLower
cs
unCapitalize
(
c
:
cs
)
=
toLower
c
:
cs
--
unCapitalize cs = map toLower cs
-- | Remove given prefix
dropPrefix
::
String
->
String
->
String
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment