Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
H
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Przemyslaw Kaminski
haskell-gargantext
Commits
3f8c2b19
Commit
3f8c2b19
authored
Apr 18, 2018
by
Alexandre Delanoë
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
[CODE/READ] with NP.
parent
a1f70708
Changes
8
Hide whitespace changes
Inline
Side-by-side
Showing
8 changed files
with
48 additions
and
33 deletions
+48
-33
package.yaml
package.yaml
+1
-0
Ngrams.hs
src/Gargantext/Ngrams.hs
+15
-16
CoreNLP.hs
src/Gargantext/Ngrams/CoreNLP.hs
+1
-14
FrequentItemSet.hs
src/Gargantext/Ngrams/FrequentItemSet.hs
+1
-0
List.hs
src/Gargantext/Ngrams/List.hs
+16
-0
Parsers.hs
src/Gargantext/Parsers.hs
+8
-1
Prelude.hs
src/Gargantext/Prelude.hs
+4
-0
Prefix.hs
src/Gargantext/Utils/Prefix.hs
+2
-2
No files found.
package.yaml
View file @
3f8c2b19
...
...
@@ -115,6 +115,7 @@ library:
-
servant-static-th
-
split
-
stemmer
-
string-conversions
-
swagger2
-
tagsoup
-
text-metrics
...
...
src/Gargantext/Ngrams.hs
View file @
3f8c2b19
...
...
@@ -22,7 +22,7 @@ module Gargantext.Ngrams ( module Gargantext.Ngrams.Letters
,
module
Gargantext
.
Ngrams
.
TextMining
,
module
Gargantext
.
Ngrams
.
Metrics
,
Ngrams
(
..
),
ngrams
,
occ
,
sumOcc
,
text2fis
,
NgramsList
(
..
)
,
ListName
(
..
),
equivNgrams
,
isGram
--, module Gargantext.Ngrams.Words
)
where
...
...
@@ -40,7 +40,7 @@ import qualified Gargantext.Ngrams.FrequentItemSet as FIS
-----------------------------------------------------------------
import
Data.List
(
sort
)
import
Data.Char
(
Char
,
isAlpha
,
isSpace
)
import
Data.Char
(
Char
,
isAlpha
Num
,
isSpace
)
import
Data.Text
(
Text
,
words
,
filter
,
toLower
)
import
Data.Map.Strict
(
Map
,
empty
...
...
@@ -58,32 +58,31 @@ import Gargantext.Prelude hiding (filter)
--import Language.Aspell.Options (ACOption(..))
data
NgramsList
=
Stop
|
Candidate
|
Graph
data
ListName
=
Stop
|
Candidate
|
Graph
deriving
(
Show
,
Eq
)
data
Ngrams
=
Ngrams
{
_ngramsNgrams
::
[
Text
]
,
_ngramsStem
::
[
Text
]
,
_ngramsList
::
Maybe
NgramsList
data
Ngrams
=
Ngrams
{
_ngramsNgrams
::
[
Text
]
,
_ngramsStem
::
[
Text
]
,
_ngramsList
Name
::
Maybe
ListName
}
deriving
(
Show
)
instance
Eq
Ngrams
where
Ngrams
n1
s1
_
==
Ngrams
n2
s2
_
=
(
sort
n1
)
==
(
sort
n2
)
||
(
sort
s1
)
==
(
sort
s2
)
equivNgrams
::
Ngrams
->
Ngrams
->
Bool
equivNgrams
(
Ngrams
n1
s1
_
)
(
Ngrams
n2
s2
_
)
=
(
sort
n1
)
==
(
sort
n2
)
||
(
sort
s1
)
==
(
sort
s2
)
type
Occ
=
Int
--type Index = Int
-- Data Ngrams = Monograms | MultiGrams
ngrams
::
Text
->
[
Text
]
ngrams
xs
=
monograms
$
toLower
$
filter
is
Char
xs
ngrams
xs
=
monograms
$
toLower
$
filter
is
Gram
xs
monograms
::
Text
->
[
Text
]
monograms
=
words
-- TODO
-- 12-b
isChar
::
Char
->
Bool
isChar
'-'
=
True
isChar
'/'
=
True
isChar
c
=
isAlpha
c
||
isSpace
c
isGram
::
Char
->
Bool
isGram
c
=
isAlphaNum
c
||
isSpace
c
||
c
`
elem
`
[
'-'
,
'/'
]
-- | Compute the occurrences (occ)
occ
::
Ord
a
=>
[
a
]
->
Map
a
Occ
...
...
@@ -91,7 +90,7 @@ occ xs = foldl' (\x y -> insertWith (+) y 1 x) empty xs
-- TODO add groups and filter stops
sumOcc
::
Ord
a
=>
[
Map
a
Occ
]
->
Map
a
Occ
sumOcc
xs
=
foldl'
(
\
x
y
->
unionWith
(
+
)
x
y
)
empty
xs
sumOcc
xs
=
foldl'
(
unionWith
(
+
)
)
empty
xs
--noApax :: Ord a => Map a Occ -> Map a Occ
--noApax m = M.filter (>1) m
...
...
src/Gargantext/Ngrams/CoreNLP.hs
View file @
3f8c2b19
...
...
@@ -28,8 +28,6 @@ import Gargantext.Prelude
import
Gargantext.Utils.Prefix
(
unPrefix
)
import
Data.Text
(
Text
)
import
qualified
Data.ByteString.Char8
as
S8
import
qualified
Data.Yaml
as
Yaml
import
Network.HTTP.Simple
...
...
@@ -82,17 +80,6 @@ $(deriveJSON (unPrefix "_") ''Sentences)
--
corenlpPretty
::
Text
->
IO
()
corenlpPretty
txt
=
do
url
<-
parseRequest
"POST http://localhost:9000/?properties={
\"
annotators
\"
:
\"
tokenize,ssplit,pos,ner
\"
,
\"
outputFormat
\"
:
\"
json
\"
}"
let
request
=
setRequestBodyJSON
txt
url
response
<-
httpJSON
request
-- putStrLn $ "The status code was: " ++
-- show (getResponseStatusCode response)
-- print $ getResponseHeader "Content-Type" response
S8
.
putStrLn
$
Yaml
.
encode
(
getResponseBody
response
::
Sentences
)
corenlp
::
Language
->
Text
->
IO
Sentences
corenlp
lang
txt
=
do
let
properties
=
case
lang
of
...
...
@@ -100,7 +87,7 @@ corenlp lang txt = do
-- FR -> "{\"annotators\": \"tokenize,ssplit,pos,ner\", \"outputFormat\": \"json\"}"
FR
->
"{
\"
annotators
\"
:
\"
tokenize,ssplit,pos,ner
\"
,
\"
parse.model
\"
:
\"
edu/stanford/nlp/models/lexparser/frenchFactored.ser.gz
\"
,
\"
pos.model
\"
:
\"
edu/stanford/nlp/models/pos-tagger/french/french.tagger
\"
,
\"
tokenize.language
\"
:
\"
fr
\"
,
\"
outputFormat
\"
:
\"
json
\"
}"
url
<-
parseRequest
$
"POST http://localhost:9000/?properties="
<>
properties
let
request
=
setRequestBody
JSON
txt
url
let
request
=
setRequestBody
LBS
(
cs
txt
)
url
response
<-
httpJSON
request
pure
(
getResponseBody
response
::
Sentences
)
...
...
src/Gargantext/Ngrams/FrequentItemSet.hs
View file @
3f8c2b19
...
...
@@ -28,6 +28,7 @@ import Gargantext.Prelude
type
Size
=
Either
Int
(
Int
,
Int
)
--data Size = Point | Segment
------------------------------------------------------------------------
-- | Occurrence is Frequent Item Set of size 1
...
...
src/Gargantext/Ngrams/List.hs
0 → 100644
View file @
3f8c2b19
module
Gargantext.Ngrams.List
where
import
Data.Maybe
import
Data.List
(
filter
)
import
Gargantext.Ngrams
import
Gargantext.Prelude
graph
::
[
Ngrams
]
->
[
Ngrams
]
graph
ngs
=
filter
(
\
ng
->
_ngramsListName
ng
==
Just
Graph
)
ngs
candidates
::
[
Ngrams
]
->
[
Ngrams
]
candidates
ngs
=
filter
(
\
ng
->
_ngramsListName
ng
==
Just
Candidate
)
ngs
stop
::
[
Ngrams
]
->
[
Ngrams
]
stop
ngs
=
filter
(
\
ng
->
_ngramsListName
ng
==
Just
Stop
)
ngs
src/Gargantext/Parsers.hs
View file @
3f8c2b19
...
...
@@ -52,6 +52,14 @@ import Gargantext.Parsers.WOS (wosParser)
--import Gargantext.Prelude (pm)
--import Gargantext.Types.Main (ErrorMessage(), Corpus)
-- FIXME
--type Field = Text
type
ParseError
=
String
--
--data Corpus = Corpus { _corpusErrors :: [ParseError]
-- , _corpusMap :: Map FilePath (Map Field Text)
-- }
-- | According to the format of Input file,
-- different parser are available.
...
...
@@ -63,7 +71,6 @@ data FileFormat = WOS -- Implemented (ISI Format)
-- -- > http://chrisdone.com/posts/fast-haskell-c-parsing-xml
-- TODO: to debug maybe add the filepath in error message
type
ParseError
=
String
parse
::
FileFormat
->
FilePath
->
IO
([
ParseError
],
[[(
Text
,
Text
)]])
...
...
src/Gargantext/Prelude.hs
View file @
3f8c2b19
...
...
@@ -12,6 +12,7 @@ module Gargantext.Prelude
,
headMay
,
module
Text
.
Show
,
module
Text
.
Read
,
cs
)
where
...
...
@@ -30,6 +31,7 @@ import Protolude ( Bool(True, False), Int, Double, Integer
,
Eq
,
(
==
),
(
>=
),
(
<=
),
(
<>
)
,
(
&&
),
(
||
),
not
,
fst
,
snd
,
toS
,
elem
)
-- TODO import functions optimized in Utils.Count
...
...
@@ -43,6 +45,8 @@ import qualified Data.Vector as V
import
Safe
(
headMay
)
import
Text.Show
(
Show
(),
show
)
import
Text.Read
(
Read
())
import
Data.String.Conversions
(
cs
)
--pf :: (a -> Bool) -> [a] -> [a]
--pf = filter
...
...
src/Gargantext/Utils/Prefix.hs
View file @
3f8c2b19
...
...
@@ -21,8 +21,8 @@ unPrefix prefix = defaultOptions
-- | Lower case leading character
unCapitalize
::
String
->
String
unCapitalize
[]
=
[]
--
unCapitalize (c:cs) = toLower c : cs
unCapitalize
cs
=
map
toLower
cs
unCapitalize
(
c
:
cs
)
=
toLower
c
:
cs
--
unCapitalize cs = map toLower cs
-- | Remove given prefix
dropPrefix
::
String
->
String
->
String
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment