Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
153
Issues
153
List
Board
Labels
Milestones
Merge Requests
12
Merge Requests
12
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
gargantext
haskell-gargantext
Commits
bd47a5e3
Commit
bd47a5e3
authored
Jun 29, 2018
by
Alexandre Delanoë
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
[CLI] spliting executables into server and cli. Fixing parser.
parent
99d4f1f3
Changes
6
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
67 additions
and
118 deletions
+67
-118
CleanCsvCorpus.hs
bin/gargantext-cli/CleanCsvCorpus.hs
+0
-0
Main.hs
bin/gargantext-server/Main.hs
+0
-0
foldFinal.hs
notes/foldFinal.hs
+0
-25
folds.hs
notes/folds.hs
+0
-56
package.yaml
package.yaml
+22
-2
Parsers.hs
src/Gargantext/Text/Parsers.hs
+45
-35
No files found.
app
/CleanCsvCorpus.hs
→
bin/gargantext-cli
/CleanCsvCorpus.hs
View file @
bd47a5e3
File moved
app
/Main.hs
→
bin/gargantext-server
/Main.hs
View file @
bd47a5e3
File moved
notes/foldFinal.hs
deleted
100644 → 0
View file @
99d4f1f3
{-# LANGUAGE ExistentialQuantification #-}
{-# LANGUAGE RankNTypes #-}
import
Control.Lens
(
Getting
,
foldMapOf
)
data
Fold
i
o
=
forall
m
.
Monoid
m
=>
Fold
(
i
->
m
)
(
m
->
o
)
instance
Functor
(
Fold
i
)
where
fmap
k
(
Fold
tally
summarize
)
=
Fold
tally
(
k
.
summarize
)
instance
Applicative
(
Fold
i
)
where
pure
o
=
Fold
(
\
_
->
()
)
(
\
_
->
o
)
Fold
tallyF
summarizeF
<*>
Fold
tallyX
summarizeX
=
Fold
tally
summarize
where
tally
i
=
(
tallyF
i
,
tallyX
i
)
summarize
(
nF
,
nX
)
=
summarizeF
nF
(
summarizeX
nX
)
focus
::
(
forall
m
.
Monoid
m
=>
Getting
m
b
a
)
->
Fold
a
o
->
Fold
b
o
focus
lens
(
Fold
tally
summarize
)
=
Fold
(
foldMapOf
lens
tally
)
summarize
notes/folds.hs
deleted
100644 → 0
View file @
99d4f1f3
{-# LANGUAGE ExistentialQuantification #-}
-- | Thanks to Gabriel Gonzales and his beautiful folds
import
Data.Monoid
import
Prelude
hiding
(
head
,
last
,
all
,
any
,
sum
,
product
,
length
)
import
qualified
Data.Foldable
data
Fold
i
o
=
forall
m
.
Monoid
m
=>
Fold
(
i
->
m
)
(
m
->
o
)
fold
::
Fold
i
o
->
[
i
]
->
o
fold
(
Fold
tally
summarize
)
is
=
summarize
(
reduce
(
map
tally
is
))
where
reduce
=
Data
.
Foldable
.
foldl'
(
<>
)
mempty
--
head
::
Fold
a
(
Maybe
a
)
head
=
Fold
(
First
.
Just
)
getFirst
last
::
Fold
a
(
Maybe
a
)
last
=
Fold
(
Last
.
Just
)
getLast
--
all
::
(
a
->
Bool
)
->
Fold
a
Bool
all
predicate
=
Fold
(
All
.
predicate
)
getAll
any
::
(
a
->
Bool
)
->
Fold
a
Bool
any
predicate
=
Fold
(
Any
.
predicate
)
getAny
--
sum
::
Num
n
=>
Fold
n
n
sum
=
Fold
Sum
getSum
product
::
Num
n
=>
Fold
n
n
product
=
Fold
Product
getProduct
length
::
Num
n
=>
Fold
i
n
length
=
Fold
(
\
_
->
Sum
1
)
getSum
--
{-# LANGUAGE BangPatterns #-}
data
Average
a
=
Average
{
numerator
::
!
a
,
denominator
::
!
Int
}
instance
Num
a
=>
Monoid
(
Average
a
)
where
mempty
=
Average
0
0
mappend
(
Average
xL
nL
)
(
Average
xR
nR
)
=
Average
(
xL
+
xR
)
(
nL
+
nR
)
average
::
Fractional
a
=>
Fold
a
a
average
=
Fold
tally
summarize
where
tally
x
=
Average
x
1
summarize
(
Average
numerator
denominator
)
=
numerator
/
fromIntegral
denominator
package.yaml
View file @
bd47a5e3
...
...
@@ -124,9 +124,29 @@ library:
# - utc
executables
:
gargantext
:
gargantext
-server
:
main
:
Main.hs
source-dirs
:
app
source-dirs
:
bin/gargantext-server
ghc-options
:
-
-threaded
-
-rtsopts
-
-with-rtsopts=-N
-
-O2
-
-Wmissing-signatures
dependencies
:
-
base
-
containers
-
gargantext
-
vector
-
cassava
-
ini
-
optparse-generic
-
unordered-containers
-
full-text-search
gargantext-cli
:
main
:
Main.hs
source-dirs
:
bin/gargantext-cli
ghc-options
:
-
-threaded
-
-rtsopts
...
...
src/Gargantext/Text/Parsers.hs
View file @
bd47a5e3
...
...
@@ -23,20 +23,31 @@ please follow the types.
module
Gargantext.Text.Parsers
-- (parse, FileFormat(..))
where
import
Gargantext.Prelude
import
System.FilePath
(
FilePath
(),
takeExtension
)
import
Codec.Archive.Zip
(
withArchive
,
getEntry
,
getEntries
)
import
System.FilePath
(
FilePath
())
import
Data.Either.Extra
(
partitionEithers
)
import
Data.List
(
concat
)
import
qualified
Data.Map
as
DM
import
qualified
Data.ByteString
as
DB
import
Data.Ord
()
import
Data.String
()
import
Data.Either
(
Either
(
..
))
import
Data.Attoparsec.ByteString
(
parseOnly
,
Parser
)
import
Data.Text
(
Text
)
import
qualified
Data.Text
as
DT
-- | Activate Async for to parse in parallel
--
import Control.Concurrent.Async as CCA (mapConcurrently)
import
Control.Concurrent.Async
as
CCA
(
mapConcurrently
)
import
Data.Text.Encoding
(
decodeUtf8
)
import
Data.String
(
String
())
------------------------------------------------------------------------
import
Gargantext.Prelude
import
Gargantext.Text.Parsers.WOS
(
wosParser
)
------------------------------------------------------------------------
type
ParseError
=
String
type
Field
=
Text
...
...
@@ -60,38 +71,37 @@ data FileFormat = WOS -- Implemented (ISI Format)
-- TODO: to debug maybe add the filepath in error message
--parse :: FileFormat -> FilePath -> IO ([ParseError], [[(Text, Text)]])
--parse format path = do
-- files <- case takeExtension path of
-- ".zip" -> openZip path
-- _ -> pure <$> DB.readFile path
-- (as, bs) <- partitionEithers <$> mapConcurrently (runParser format) files
-- pure (as, map toText $ concat bs)
-- where
-- -- TODO : decode with bayesian inference on encodings
-- toText = map (\(a,b) -> (decodeUtf8 a, decodeUtf8 b))
--
--
---- | withParser:
---- According the format of the text, choosing the right parser.
---- TODO withParser :: FileFormat -> Parser [Document]
--withParser :: FileFormat -> Parser [[(DB.ByteString, DB.ByteString)]]
--withParser WOS = wosParser
----withParser DOC = docParser
----withParser ODT = odtParser
----withParser XML = xmlParser
----withParser _ = error "[ERROR] Parser not implemented yet"
--
--runParser :: FileFormat -> DB.ByteString
-- -> IO (Either String [[(DB.ByteString, DB.ByteString)]])
--runParser format text = pure $ parseOnly (withParser format) text
--
--openZip :: FilePath -> IO [DB.ByteString]
--openZip fp = do
-- path <- resolveFile' fp
-- entries <- withArchive path (DM.keys <$> getEntries)
-- bs <- mapConcurrently (\s -> withArchive path (getEntry s)) entries
-- pure bs
parse
::
FileFormat
->
FilePath
->
IO
([
ParseError
],
[[(
Text
,
Text
)]])
parse
format
path
=
do
files
<-
case
takeExtension
path
of
".zip"
->
openZip
path
_
->
pure
<$>
DB
.
readFile
path
(
as
,
bs
)
<-
partitionEithers
<$>
mapConcurrently
(
runParser
format
)
files
pure
(
as
,
map
toText
$
concat
bs
)
where
-- TODO : decode with bayesian inference on encodings
toText
=
map
(
\
(
a
,
b
)
->
(
decodeUtf8
a
,
decodeUtf8
b
))
-- | withParser:
-- According the format of the text, choosing the right parser.
-- TODO withParser :: FileFormat -> Parser [Document]
withParser
::
FileFormat
->
Parser
[[(
DB
.
ByteString
,
DB
.
ByteString
)]]
withParser
WOS
=
wosParser
--withParser DOC = docParser
--withParser ODT = odtParser
--withParser XML = xmlParser
--withParser _ = error "[ERROR] Parser not implemented yet"
runParser
::
FileFormat
->
DB
.
ByteString
->
IO
(
Either
String
[[(
DB
.
ByteString
,
DB
.
ByteString
)]])
runParser
format
text
=
pure
$
parseOnly
(
withParser
format
)
text
openZip
::
FilePath
->
IO
[
DB
.
ByteString
]
openZip
fp
=
do
entries
<-
withArchive
fp
(
DM
.
keys
<$>
getEntries
)
bs
<-
mapConcurrently
(
\
s
->
withArchive
fp
(
getEntry
s
))
entries
pure
bs
clean
::
Text
->
Text
clean
txt
=
DT
.
map
clean'
txt
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment