Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
195
Issues
195
List
Board
Labels
Milestones
Merge Requests
12
Merge Requests
12
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
gargantext
haskell-gargantext
Commits
787cd46e
Commit
787cd46e
authored
Oct 31, 2017
by
Alexandre Delanoë
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
[FEAT] Adding WOS parser, NLP functions, Rights management guidelines (draft).
parent
a72e061f
Changes
15
Hide whitespace changes
Inline
Side-by-side
Showing
15 changed files
with
277 additions
and
7 deletions
+277
-7
gargantext.cabal
gargantext.cabal
+9
-1
foldFinal.hs
notes/foldFinal.hs
+25
-0
folds.hs
notes/folds.hs
+56
-0
Analysis.hs
src/Data/Gargantext/Analysis.hs
+15
-0
API.hs
src/Data/Gargantext/Internal/API.hs
+9
-0
CoreNLP.hs
src/Data/Gargantext/NLP/CoreNLP.hs
+2
-2
Network.hs
src/Data/Gargantext/Network.hs
+36
-0
Occurrences.hs
src/Data/Gargantext/Ngrams/Occurrences.hs
+1
-1
Parsers.hs
src/Data/Gargantext/Parsers.hs
+2
-2
CSV.hs
src/Data/Gargantext/Parsers/CSV.hs
+0
-0
RIS.hs
src/Data/Gargantext/Parsers/RIS.hs
+0
-0
Utils.hs
src/Data/Gargantext/Parsers/Utils.hs
+8
-0
WOS.hs
src/Data/Gargantext/Parsers/WOS.hs
+112
-0
XML.hs
src/Data/Gargantext/Parsers/XML.hs
+1
-0
stack.yaml
stack.yaml
+1
-1
No files found.
gargantext.cabal
View file @
787cd46e
...
...
@@ -18,17 +18,21 @@ library
build-depends: base >= 4.7 && < 5
, aeson
, attoparsec
, async
, base16-bytestring
, bytestring
, case-insensitive
, containers
, contravariant
, conduit
, conduit-extra
, directory
, extra
, filepath
, http-conduit
, lens
, opaleye
, path
, postgresql-simple
, pretty
, product-profunctors
...
...
@@ -53,11 +57,15 @@ library
, warp
, yaml
, zlib
, zip
, path-io
--, LibZip
-- , stemmer
--, utc
exposed-modules: Data.Gargantext
, Data.Gargantext.Analysis
, Data.Gargantext.DSL
, Data.Gargantext.Ngrams.Occurrences
, Data.Gargantext.NLP
, Data.Gargantext.NLP.CoreNLP
, Data.Gargantext.Database
...
...
@@ -71,7 +79,7 @@ library
, Data.Gargantext.Database.Private
, Data.Gargantext.Database.User
, Data.Gargantext.Parsers
, Data.Gargantext.Parsers.
Occurrences
, Data.Gargantext.Parsers.
WOS
, Data.Gargantext.Prelude
, Data.Gargantext.Server
, Data.Gargantext.Types
...
...
notes/foldFinal.hs
0 → 100644
View file @
787cd46e
{-# LANGUAGE ExistentialQuantification #-}
{-# LANGUAGE RankNTypes #-}
import
Control.Lens
(
Getting
,
foldMapOf
)
data
Fold
i
o
=
forall
m
.
Monoid
m
=>
Fold
(
i
->
m
)
(
m
->
o
)
instance
Functor
(
Fold
i
)
where
fmap
k
(
Fold
tally
summarize
)
=
Fold
tally
(
k
.
summarize
)
instance
Applicative
(
Fold
i
)
where
pure
o
=
Fold
(
\
_
->
()
)
(
\
_
->
o
)
Fold
tallyF
summarizeF
<*>
Fold
tallyX
summarizeX
=
Fold
tally
summarize
where
tally
i
=
(
tallyF
i
,
tallyX
i
)
summarize
(
nF
,
nX
)
=
summarizeF
nF
(
summarizeX
nX
)
focus
::
(
forall
m
.
Monoid
m
=>
Getting
m
b
a
)
->
Fold
a
o
->
Fold
b
o
focus
lens
(
Fold
tally
summarize
)
=
Fold
(
foldMapOf
lens
tally
)
summarize
notes/folds.hs
0 → 100644
View file @
787cd46e
{-# LANGUAGE ExistentialQuantification #-}
-- | Thanks to Gabriel Gonzales and his beautiful folds
import
Data.Monoid
import
Prelude
hiding
(
head
,
last
,
all
,
any
,
sum
,
product
,
length
)
import
qualified
Data.Foldable
data
Fold
i
o
=
forall
m
.
Monoid
m
=>
Fold
(
i
->
m
)
(
m
->
o
)
fold
::
Fold
i
o
->
[
i
]
->
o
fold
(
Fold
tally
summarize
)
is
=
summarize
(
reduce
(
map
tally
is
))
where
reduce
=
Data
.
Foldable
.
foldl'
(
<>
)
mempty
--
head
::
Fold
a
(
Maybe
a
)
head
=
Fold
(
First
.
Just
)
getFirst
last
::
Fold
a
(
Maybe
a
)
last
=
Fold
(
Last
.
Just
)
getLast
--
all
::
(
a
->
Bool
)
->
Fold
a
Bool
all
predicate
=
Fold
(
All
.
predicate
)
getAll
any
::
(
a
->
Bool
)
->
Fold
a
Bool
any
predicate
=
Fold
(
Any
.
predicate
)
getAny
--
sum
::
Num
n
=>
Fold
n
n
sum
=
Fold
Sum
getSum
product
::
Num
n
=>
Fold
n
n
product
=
Fold
Product
getProduct
length
::
Num
n
=>
Fold
i
n
length
=
Fold
(
\
_
->
Sum
1
)
getSum
--
{-# LANGUAGE BangPatterns #-}
data
Average
a
=
Average
{
numerator
::
!
a
,
denominator
::
!
Int
}
instance
Num
a
=>
Monoid
(
Average
a
)
where
mempty
=
Average
0
0
mappend
(
Average
xL
nL
)
(
Average
xR
nR
)
=
Average
(
xL
+
xR
)
(
nL
+
nR
)
average
::
Fractional
a
=>
Fold
a
a
average
=
Fold
tally
summarize
where
tally
x
=
Average
x
1
summarize
(
Average
numerator
denominator
)
=
numerator
/
fromIntegral
denominator
src/Data/Gargantext/Analysis.hs
View file @
787cd46e
...
...
@@ -3,6 +3,8 @@ module Data.Gargantext.Analysis where
-- import qualified Data.Text.Lazy as DTL
import
Data.Text
import
Opaleye
(
Column
,
PGInt4
)
--import Data.Map as DM
--import Data.Vector as DV
-- | Simple function to count Occurrences in a context of text.
occOfDocument
::
Column
PGInt4
->
Text
->
IO
Int
...
...
@@ -15,3 +17,16 @@ occOfDocument = undefined
-- Left str -> error $ "[ERRROR] at file/function/line" ++ str
-- Right xs -> xs
-- pure (sum result)
data
Occurrences
a
b
=
Map
a
b
src/Data/Gargantext/Internal/API.hs
0 → 100644
View file @
787cd46e
garg
garg
CorpusWith
makeAListOf
Corpus
ListType
src/Data/Gargantext/NLP/CoreNLP.hs
View file @
787cd46e
...
...
@@ -52,7 +52,7 @@ instance FromJSON Sentences
corenlpPretty
::
String
->
IO
()
corenlpPretty
txt
=
do
let
url
=
"POST http://localhost:9000/?properties={
\"
annotators
\"
:
\"
tokenize,ssplit,pos,ner
\"
,
\"
outputFormat
\"
:
\"
json
\"
}"
url
<-
parseRequest
"POST http://localhost:9000/?properties={
\"
annotators
\"
:
\"
tokenize,ssplit,pos,ner
\"
,
\"
outputFormat
\"
:
\"
json
\"
}"
let
request
=
setRequestBodyJSON
txt
url
response
<-
httpJSON
request
...
...
@@ -63,7 +63,7 @@ corenlpPretty txt = do
corenlp
::
String
->
IO
Sentences
corenlp
txt
=
do
let
url
=
"POST http://localhost:9000/?properties={
\"
annotators
\"
:
\"
tokenize,ssplit,pos,ner
\"
,
\"
outputFormat
\"
:
\"
json
\"
}"
url
<-
parseRequest
"POST http://localhost:9000/?properties={
\"
annotators
\"
:
\"
tokenize,ssplit,pos,ner
\"
,
\"
outputFormat
\"
:
\"
json
\"
}"
let
request
=
setRequestBodyJSON
txt
url
response
<-
httpJSON
request
pure
(
getResponseBody
response
::
Sentences
)
...
...
src/Data/Gargantext/Network.hs
0 → 100644
View file @
787cd46e
module
Data.Gargantext.Network
where
import
Data.Gargantext.Prelude
import
Data.Map
as
DM
import
Data.Vector
as
DV
type
Measure
a
b
c
=
DM
.
Map
a
(
DM
.
Map
b
c
)
-- UTCTime Paire Granularity [Candle]
-- GargVector Paire Granularity [Candle]
type
GargVector
a
b
c
=
DM
.
Map
a
(
DM
.
Map
b
c
)
-- GargMatrix Granularity (Paire Paire) [Candle]
type
GargMatrix
a
b
c
d
=
DM
.
Map
a
(
FolioVector
b
c
d
)
-- GargMatrix Granularity (Paire Paire) [Candle]
type
GargTensor
a
b
c
d
e
=
DM
.
Map
a
(
FolioMatrix
b
c
d
e
)
--data PortGarg = PortGarg { _portFolioParameters :: Parameters
-- , _portGargData :: Garg
--}
toMeasure
::
Granularity
->
Paire
->
[
Candle
]
->
Measure
Granularity
Paire
Candle
toMeasure
g
c1
c2
cs
=
DM
.
fromList
[(
g
,
src/Data/Gargantext/
Parser
s/Occurrences.hs
→
src/Data/Gargantext/
Ngram
s/Occurrences.hs
View file @
787cd46e
{-# LANGUAGE OverloadedStrings #-}
module
Data.Gargantext.
Parser
s.Occurrences
where
module
Data.Gargantext.
Ngram
s.Occurrences
where
import
Data.Attoparsec.Text
import
Data.Text
(
Text
)
...
...
src/Data/Gargantext/Parsers.hs
View file @
787cd46e
module
Data.Gargantext.Parsers
(
module
Data
.
Gargantext
.
Parsers
.
Occurrences
)
module
Data.Gargantext.Parsers
(
module
Data
.
Gargantext
.
Parsers
.
WOS
)
where
import
Data.Gargantext.Parsers.
Occurrences
import
Data.Gargantext.Parsers.
WOS
src/Data/Gargantext/Parsers/CSV.hs
0 → 100644
View file @
787cd46e
src/Data/Gargantext/Parsers/RIS.hs
0 → 100644
View file @
787cd46e
src/Data/Gargantext/Parsers/Utils.hs
0 → 100644
View file @
787cd46e
module
Data.Gargantext.Parsers.Utils
where
-- use Duckling here
parseDate
=
undefined
src/Data/Gargantext/Parsers/WOS.hs
0 → 100644
View file @
787cd46e
module
Data.Gargantext.Parsers.WOS
where
import
Prelude
hiding
(
takeWhile
,
take
,
concat
,
readFile
)
import
qualified
Data.List
as
DL
import
Data.Map
as
DM
import
Data.Attoparsec.ByteString
import
Data.Attoparsec.ByteString.Char8
(
anyChar
,
char8
,
endOfLine
,
isDigit_w8
,
isAlpha_ascii
,
isEndOfLine
)
import
Data.ByteString
(
ByteString
,
unpack
,
pack
,
concat
,
readFile
)
import
Data.Either.Extra
(
Either
(
..
))
import
Control.Applicative
import
Control.Monad
(
join
)
-- To be removed just for Tests
--
-- import Codec.Archive.LibZip (withArchive, fileNames, sourceFile, addFile)
--import Codec.Archive.LibZip.Types (ZipSource, OpenFlag (CreateFlag))
import
Control.Concurrent.Async
as
CCA
(
mapConcurrently
)
import
System.Environment
import
Codec.Archive.Zip
import
Path
(
parseAbsFile
)
import
Path.IO
(
resolveFile'
)
-- import qualified Data.ByteString.Lazy as B
import
Control.Applicative
(
(
<$>
)
)
zipFiles
::
FilePath
->
IO
[
ByteString
]
zipFiles
fp
=
do
path
<-
resolveFile'
fp
entries
<-
withArchive
path
(
DM
.
keys
<$>
getEntries
)
bs
<-
mapConcurrently
(
\
s
->
withArchive
path
(
getEntry
s
))
entries
pure
bs
parseFile
::
ParserType
->
ByteString
->
IO
Int
parseFile
p
x
=
case
runParser
p
x
of
Left
e
->
pure
1
Right
r
->
pure
$
length
r
testWos
::
FilePath
->
IO
[
Int
]
testWos
fp
=
join
$
mapConcurrently
(
parseFile
WOS
)
<$>
zipFiles
fp
-- type Parser a = a -> Text -> [Document]
data
ParserType
=
WOS
|
CSV
wosParser
::
Parser
[
Maybe
[
ByteString
]]
wosParser
=
do
-- TODO Warning if version /= 1.0
_
<-
manyTill
anyChar
(
string
"
\n
VR 1.0"
)
ns
<-
many1
wosNotice
<*
"
\n
EF"
return
ns
startNotice
::
Parser
ByteString
startNotice
=
"
\n
PT "
*>
takeTill
isEndOfLine
wosNotice
::
Parser
(
Maybe
[
ByteString
])
wosNotice
=
do
n
<-
startNotice
*>
wosFields
<*
manyTill
anyChar
(
string
"
\n
ER
\n
"
)
return
n
field'
::
Parser
(
ByteString
,
[
ByteString
])
field'
=
do
f
<-
"
\n
"
*>
take
2
<*
" "
a
<-
takeTill
isEndOfLine
as
<-
try
wosLines
let
as'
=
case
DL
.
length
as
>
0
of
True
->
as
False
->
[]
return
(
f
,
[
a
]
++
as'
)
wosFields'
::
Parser
[(
ByteString
,
[
ByteString
])]
wosFields'
=
many
field'
wosFields
::
Parser
(
Maybe
[
ByteString
])
wosFields
=
do
-- a <- field "AU"
-- t <- field "TI"
-- s <- field "SO"
-- d <- field "DI" -- DOI
-- p <- field "PD"
-- b <- field "AB"
-- u <- field "UT"
ws
<-
many
field'
return
$
DL
.
lookup
"UT"
ws
-- return $ HyperdataDocument
-- Just "WOS"
-- DL.lookup "DI" ws
-- DL.lookup "URL" ws
-- DL.lookup "PA" ws
-- DL.lookup "TI" ws
--
wosLines
::
Parser
[
ByteString
]
wosLines
=
many
line
where
line
::
Parser
ByteString
line
=
"
\n
"
*>
takeTill
isEndOfLine
runParser
::
ParserType
->
ByteString
->
Either
String
[
Maybe
[
ByteString
]]
runParser
p
x
=
parseOnly
parser
x
where
parser
=
case
p
of
WOS
->
wosParser
_
->
error
"Not implemented yet"
-- isTokenChar :: Word8 -> Bool
-- isTokenChar = inClass "!#$%&'()*+./0-9:<=>?@a-zA-Z[]^_`{|}~-\n"
src/Data/Gargantext/Parsers/XML.hs
0 → 100644
View file @
787cd46e
-- http://chrisdone.com/posts/fast-haskell-c-parsing-xml
stack.yaml
View file @
787cd46e
...
...
@@ -2,6 +2,6 @@ flags: {}
extra-package-dbs
:
[]
packages
:
-
.
-
/home/alexandre/local/logiciels/haskell/servant/servant-multipart
extra-deps
:
-
servant-multipart-0.10.0.1
resolver
:
lts-9.2
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment