Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
H
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Przemyslaw Kaminski
haskell-gargantext
Commits
787cd46e
Commit
787cd46e
authored
Oct 31, 2017
by
Alexandre Delanoë
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
[FEAT] Adding WOS parser, NLP functions, Rights management guidelines (draft).
parent
a72e061f
Changes
15
Show whitespace changes
Inline
Side-by-side
Showing
15 changed files
with
277 additions
and
7 deletions
+277
-7
gargantext.cabal
gargantext.cabal
+9
-1
foldFinal.hs
notes/foldFinal.hs
+25
-0
folds.hs
notes/folds.hs
+56
-0
Analysis.hs
src/Data/Gargantext/Analysis.hs
+15
-0
API.hs
src/Data/Gargantext/Internal/API.hs
+9
-0
CoreNLP.hs
src/Data/Gargantext/NLP/CoreNLP.hs
+2
-2
Network.hs
src/Data/Gargantext/Network.hs
+36
-0
Occurrences.hs
src/Data/Gargantext/Ngrams/Occurrences.hs
+1
-1
Parsers.hs
src/Data/Gargantext/Parsers.hs
+2
-2
CSV.hs
src/Data/Gargantext/Parsers/CSV.hs
+0
-0
RIS.hs
src/Data/Gargantext/Parsers/RIS.hs
+0
-0
Utils.hs
src/Data/Gargantext/Parsers/Utils.hs
+8
-0
WOS.hs
src/Data/Gargantext/Parsers/WOS.hs
+112
-0
XML.hs
src/Data/Gargantext/Parsers/XML.hs
+1
-0
stack.yaml
stack.yaml
+1
-1
No files found.
gargantext.cabal
View file @
787cd46e
...
@@ -18,17 +18,21 @@ library
...
@@ -18,17 +18,21 @@ library
build-depends: base >= 4.7 && < 5
build-depends: base >= 4.7 && < 5
, aeson
, aeson
, attoparsec
, attoparsec
, async
, base16-bytestring
, base16-bytestring
, bytestring
, bytestring
, case-insensitive
, case-insensitive
, containers
, containers
, contravariant
, contravariant
, conduit
, conduit-extra
, directory
, directory
, extra
, extra
, filepath
, filepath
, http-conduit
, http-conduit
, lens
, lens
, opaleye
, opaleye
, path
, postgresql-simple
, postgresql-simple
, pretty
, pretty
, product-profunctors
, product-profunctors
...
@@ -53,11 +57,15 @@ library
...
@@ -53,11 +57,15 @@ library
, warp
, warp
, yaml
, yaml
, zlib
, zlib
, zip
, path-io
--, LibZip
-- , stemmer
-- , stemmer
--, utc
--, utc
exposed-modules: Data.Gargantext
exposed-modules: Data.Gargantext
, Data.Gargantext.Analysis
, Data.Gargantext.Analysis
, Data.Gargantext.DSL
, Data.Gargantext.DSL
, Data.Gargantext.Ngrams.Occurrences
, Data.Gargantext.NLP
, Data.Gargantext.NLP
, Data.Gargantext.NLP.CoreNLP
, Data.Gargantext.NLP.CoreNLP
, Data.Gargantext.Database
, Data.Gargantext.Database
...
@@ -71,7 +79,7 @@ library
...
@@ -71,7 +79,7 @@ library
, Data.Gargantext.Database.Private
, Data.Gargantext.Database.Private
, Data.Gargantext.Database.User
, Data.Gargantext.Database.User
, Data.Gargantext.Parsers
, Data.Gargantext.Parsers
, Data.Gargantext.Parsers.
Occurrences
, Data.Gargantext.Parsers.
WOS
, Data.Gargantext.Prelude
, Data.Gargantext.Prelude
, Data.Gargantext.Server
, Data.Gargantext.Server
, Data.Gargantext.Types
, Data.Gargantext.Types
...
...
notes/foldFinal.hs
0 → 100644
View file @
787cd46e
{-# LANGUAGE ExistentialQuantification #-}
{-# LANGUAGE RankNTypes #-}
import
Control.Lens
(
Getting
,
foldMapOf
)
data
Fold
i
o
=
forall
m
.
Monoid
m
=>
Fold
(
i
->
m
)
(
m
->
o
)
instance
Functor
(
Fold
i
)
where
fmap
k
(
Fold
tally
summarize
)
=
Fold
tally
(
k
.
summarize
)
instance
Applicative
(
Fold
i
)
where
pure
o
=
Fold
(
\
_
->
()
)
(
\
_
->
o
)
Fold
tallyF
summarizeF
<*>
Fold
tallyX
summarizeX
=
Fold
tally
summarize
where
tally
i
=
(
tallyF
i
,
tallyX
i
)
summarize
(
nF
,
nX
)
=
summarizeF
nF
(
summarizeX
nX
)
focus
::
(
forall
m
.
Monoid
m
=>
Getting
m
b
a
)
->
Fold
a
o
->
Fold
b
o
focus
lens
(
Fold
tally
summarize
)
=
Fold
(
foldMapOf
lens
tally
)
summarize
notes/folds.hs
0 → 100644
View file @
787cd46e
{-# LANGUAGE ExistentialQuantification #-}
-- | Thanks to Gabriel Gonzales and his beautiful folds
import
Data.Monoid
import
Prelude
hiding
(
head
,
last
,
all
,
any
,
sum
,
product
,
length
)
import
qualified
Data.Foldable
data
Fold
i
o
=
forall
m
.
Monoid
m
=>
Fold
(
i
->
m
)
(
m
->
o
)
fold
::
Fold
i
o
->
[
i
]
->
o
fold
(
Fold
tally
summarize
)
is
=
summarize
(
reduce
(
map
tally
is
))
where
reduce
=
Data
.
Foldable
.
foldl'
(
<>
)
mempty
--
head
::
Fold
a
(
Maybe
a
)
head
=
Fold
(
First
.
Just
)
getFirst
last
::
Fold
a
(
Maybe
a
)
last
=
Fold
(
Last
.
Just
)
getLast
--
all
::
(
a
->
Bool
)
->
Fold
a
Bool
all
predicate
=
Fold
(
All
.
predicate
)
getAll
any
::
(
a
->
Bool
)
->
Fold
a
Bool
any
predicate
=
Fold
(
Any
.
predicate
)
getAny
--
sum
::
Num
n
=>
Fold
n
n
sum
=
Fold
Sum
getSum
product
::
Num
n
=>
Fold
n
n
product
=
Fold
Product
getProduct
length
::
Num
n
=>
Fold
i
n
length
=
Fold
(
\
_
->
Sum
1
)
getSum
--
{-# LANGUAGE BangPatterns #-}
data
Average
a
=
Average
{
numerator
::
!
a
,
denominator
::
!
Int
}
instance
Num
a
=>
Monoid
(
Average
a
)
where
mempty
=
Average
0
0
mappend
(
Average
xL
nL
)
(
Average
xR
nR
)
=
Average
(
xL
+
xR
)
(
nL
+
nR
)
average
::
Fractional
a
=>
Fold
a
a
average
=
Fold
tally
summarize
where
tally
x
=
Average
x
1
summarize
(
Average
numerator
denominator
)
=
numerator
/
fromIntegral
denominator
src/Data/Gargantext/Analysis.hs
View file @
787cd46e
...
@@ -3,6 +3,8 @@ module Data.Gargantext.Analysis where
...
@@ -3,6 +3,8 @@ module Data.Gargantext.Analysis where
-- import qualified Data.Text.Lazy as DTL
-- import qualified Data.Text.Lazy as DTL
import
Data.Text
import
Data.Text
import
Opaleye
(
Column
,
PGInt4
)
import
Opaleye
(
Column
,
PGInt4
)
--import Data.Map as DM
--import Data.Vector as DV
-- | Simple function to count Occurrences in a context of text.
-- | Simple function to count Occurrences in a context of text.
occOfDocument
::
Column
PGInt4
->
Text
->
IO
Int
occOfDocument
::
Column
PGInt4
->
Text
->
IO
Int
...
@@ -15,3 +17,16 @@ occOfDocument = undefined
...
@@ -15,3 +17,16 @@ occOfDocument = undefined
-- Left str -> error $ "[ERRROR] at file/function/line" ++ str
-- Left str -> error $ "[ERRROR] at file/function/line" ++ str
-- Right xs -> xs
-- Right xs -> xs
-- pure (sum result)
-- pure (sum result)
data
Occurrences
a
b
=
Map
a
b
src/Data/Gargantext/Internal/API.hs
0 → 100644
View file @
787cd46e
garg
garg
CorpusWith
makeAListOf
Corpus
ListType
src/Data/Gargantext/NLP/CoreNLP.hs
View file @
787cd46e
...
@@ -52,7 +52,7 @@ instance FromJSON Sentences
...
@@ -52,7 +52,7 @@ instance FromJSON Sentences
corenlpPretty
::
String
->
IO
()
corenlpPretty
::
String
->
IO
()
corenlpPretty
txt
=
do
corenlpPretty
txt
=
do
let
url
=
"POST http://localhost:9000/?properties={
\"
annotators
\"
:
\"
tokenize,ssplit,pos,ner
\"
,
\"
outputFormat
\"
:
\"
json
\"
}"
url
<-
parseRequest
"POST http://localhost:9000/?properties={
\"
annotators
\"
:
\"
tokenize,ssplit,pos,ner
\"
,
\"
outputFormat
\"
:
\"
json
\"
}"
let
request
=
setRequestBodyJSON
txt
url
let
request
=
setRequestBodyJSON
txt
url
response
<-
httpJSON
request
response
<-
httpJSON
request
...
@@ -63,7 +63,7 @@ corenlpPretty txt = do
...
@@ -63,7 +63,7 @@ corenlpPretty txt = do
corenlp
::
String
->
IO
Sentences
corenlp
::
String
->
IO
Sentences
corenlp
txt
=
do
corenlp
txt
=
do
let
url
=
"POST http://localhost:9000/?properties={
\"
annotators
\"
:
\"
tokenize,ssplit,pos,ner
\"
,
\"
outputFormat
\"
:
\"
json
\"
}"
url
<-
parseRequest
"POST http://localhost:9000/?properties={
\"
annotators
\"
:
\"
tokenize,ssplit,pos,ner
\"
,
\"
outputFormat
\"
:
\"
json
\"
}"
let
request
=
setRequestBodyJSON
txt
url
let
request
=
setRequestBodyJSON
txt
url
response
<-
httpJSON
request
response
<-
httpJSON
request
pure
(
getResponseBody
response
::
Sentences
)
pure
(
getResponseBody
response
::
Sentences
)
...
...
src/Data/Gargantext/Network.hs
0 → 100644
View file @
787cd46e
module
Data.Gargantext.Network
where
import
Data.Gargantext.Prelude
import
Data.Map
as
DM
import
Data.Vector
as
DV
type
Measure
a
b
c
=
DM
.
Map
a
(
DM
.
Map
b
c
)
-- UTCTime Paire Granularity [Candle]
-- GargVector Paire Granularity [Candle]
type
GargVector
a
b
c
=
DM
.
Map
a
(
DM
.
Map
b
c
)
-- GargMatrix Granularity (Paire Paire) [Candle]
type
GargMatrix
a
b
c
d
=
DM
.
Map
a
(
FolioVector
b
c
d
)
-- GargMatrix Granularity (Paire Paire) [Candle]
type
GargTensor
a
b
c
d
e
=
DM
.
Map
a
(
FolioMatrix
b
c
d
e
)
--data PortGarg = PortGarg { _portFolioParameters :: Parameters
-- , _portGargData :: Garg
--}
toMeasure
::
Granularity
->
Paire
->
[
Candle
]
->
Measure
Granularity
Paire
Candle
toMeasure
g
c1
c2
cs
=
DM
.
fromList
[(
g
,
src/Data/Gargantext/
Parser
s/Occurrences.hs
→
src/Data/Gargantext/
Ngram
s/Occurrences.hs
View file @
787cd46e
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE OverloadedStrings #-}
module
Data.Gargantext.
Parser
s.Occurrences
where
module
Data.Gargantext.
Ngram
s.Occurrences
where
import
Data.Attoparsec.Text
import
Data.Attoparsec.Text
import
Data.Text
(
Text
)
import
Data.Text
(
Text
)
...
...
src/Data/Gargantext/Parsers.hs
View file @
787cd46e
module
Data.Gargantext.Parsers
(
module
Data
.
Gargantext
.
Parsers
.
Occurrences
)
module
Data.Gargantext.Parsers
(
module
Data
.
Gargantext
.
Parsers
.
WOS
)
where
where
import
Data.Gargantext.Parsers.
Occurrences
import
Data.Gargantext.Parsers.
WOS
src/Data/Gargantext/Parsers/CSV.hs
0 → 100644
View file @
787cd46e
src/Data/Gargantext/Parsers/RIS.hs
0 → 100644
View file @
787cd46e
src/Data/Gargantext/Parsers/Utils.hs
0 → 100644
View file @
787cd46e
module
Data.Gargantext.Parsers.Utils
where
-- use Duckling here
parseDate
=
undefined
src/Data/Gargantext/Parsers/WOS.hs
0 → 100644
View file @
787cd46e
module
Data.Gargantext.Parsers.WOS
where
import
Prelude
hiding
(
takeWhile
,
take
,
concat
,
readFile
)
import
qualified
Data.List
as
DL
import
Data.Map
as
DM
import
Data.Attoparsec.ByteString
import
Data.Attoparsec.ByteString.Char8
(
anyChar
,
char8
,
endOfLine
,
isDigit_w8
,
isAlpha_ascii
,
isEndOfLine
)
import
Data.ByteString
(
ByteString
,
unpack
,
pack
,
concat
,
readFile
)
import
Data.Either.Extra
(
Either
(
..
))
import
Control.Applicative
import
Control.Monad
(
join
)
-- To be removed just for Tests
--
-- import Codec.Archive.LibZip (withArchive, fileNames, sourceFile, addFile)
--import Codec.Archive.LibZip.Types (ZipSource, OpenFlag (CreateFlag))
import
Control.Concurrent.Async
as
CCA
(
mapConcurrently
)
import
System.Environment
import
Codec.Archive.Zip
import
Path
(
parseAbsFile
)
import
Path.IO
(
resolveFile'
)
-- import qualified Data.ByteString.Lazy as B
import
Control.Applicative
(
(
<$>
)
)
zipFiles
::
FilePath
->
IO
[
ByteString
]
zipFiles
fp
=
do
path
<-
resolveFile'
fp
entries
<-
withArchive
path
(
DM
.
keys
<$>
getEntries
)
bs
<-
mapConcurrently
(
\
s
->
withArchive
path
(
getEntry
s
))
entries
pure
bs
parseFile
::
ParserType
->
ByteString
->
IO
Int
parseFile
p
x
=
case
runParser
p
x
of
Left
e
->
pure
1
Right
r
->
pure
$
length
r
testWos
::
FilePath
->
IO
[
Int
]
testWos
fp
=
join
$
mapConcurrently
(
parseFile
WOS
)
<$>
zipFiles
fp
-- type Parser a = a -> Text -> [Document]
data
ParserType
=
WOS
|
CSV
wosParser
::
Parser
[
Maybe
[
ByteString
]]
wosParser
=
do
-- TODO Warning if version /= 1.0
_
<-
manyTill
anyChar
(
string
"
\n
VR 1.0"
)
ns
<-
many1
wosNotice
<*
"
\n
EF"
return
ns
startNotice
::
Parser
ByteString
startNotice
=
"
\n
PT "
*>
takeTill
isEndOfLine
wosNotice
::
Parser
(
Maybe
[
ByteString
])
wosNotice
=
do
n
<-
startNotice
*>
wosFields
<*
manyTill
anyChar
(
string
"
\n
ER
\n
"
)
return
n
field'
::
Parser
(
ByteString
,
[
ByteString
])
field'
=
do
f
<-
"
\n
"
*>
take
2
<*
" "
a
<-
takeTill
isEndOfLine
as
<-
try
wosLines
let
as'
=
case
DL
.
length
as
>
0
of
True
->
as
False
->
[]
return
(
f
,
[
a
]
++
as'
)
wosFields'
::
Parser
[(
ByteString
,
[
ByteString
])]
wosFields'
=
many
field'
wosFields
::
Parser
(
Maybe
[
ByteString
])
wosFields
=
do
-- a <- field "AU"
-- t <- field "TI"
-- s <- field "SO"
-- d <- field "DI" -- DOI
-- p <- field "PD"
-- b <- field "AB"
-- u <- field "UT"
ws
<-
many
field'
return
$
DL
.
lookup
"UT"
ws
-- return $ HyperdataDocument
-- Just "WOS"
-- DL.lookup "DI" ws
-- DL.lookup "URL" ws
-- DL.lookup "PA" ws
-- DL.lookup "TI" ws
--
wosLines
::
Parser
[
ByteString
]
wosLines
=
many
line
where
line
::
Parser
ByteString
line
=
"
\n
"
*>
takeTill
isEndOfLine
runParser
::
ParserType
->
ByteString
->
Either
String
[
Maybe
[
ByteString
]]
runParser
p
x
=
parseOnly
parser
x
where
parser
=
case
p
of
WOS
->
wosParser
_
->
error
"Not implemented yet"
-- isTokenChar :: Word8 -> Bool
-- isTokenChar = inClass "!#$%&'()*+./0-9:<=>?@a-zA-Z[]^_`{|}~-\n"
src/Data/Gargantext/Parsers/XML.hs
0 → 100644
View file @
787cd46e
-- http://chrisdone.com/posts/fast-haskell-c-parsing-xml
stack.yaml
View file @
787cd46e
...
@@ -2,6 +2,6 @@ flags: {}
...
@@ -2,6 +2,6 @@ flags: {}
extra-package-dbs
:
[]
extra-package-dbs
:
[]
packages
:
packages
:
-
.
-
.
-
/home/alexandre/local/logiciels/haskell/servant/servant-multipart
extra-deps
:
extra-deps
:
-
servant-multipart-0.10.0.1
resolver
:
lts-9.2
resolver
:
lts-9.2
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment