Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
H
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Przemyslaw Kaminski
haskell-gargantext
Commits
bd47a5e3
Commit
bd47a5e3
authored
Jun 29, 2018
by
Alexandre Delanoë
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
[CLI] spliting executables into server and cli. Fixing parser.
parent
99d4f1f3
Changes
6
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
67 additions
and
118 deletions
+67
-118
CleanCsvCorpus.hs
bin/gargantext-cli/CleanCsvCorpus.hs
+0
-0
Main.hs
bin/gargantext-server/Main.hs
+0
-0
foldFinal.hs
notes/foldFinal.hs
+0
-25
folds.hs
notes/folds.hs
+0
-56
package.yaml
package.yaml
+22
-2
Parsers.hs
src/Gargantext/Text/Parsers.hs
+45
-35
No files found.
app
/CleanCsvCorpus.hs
→
bin/gargantext-cli
/CleanCsvCorpus.hs
View file @
bd47a5e3
File moved
app
/Main.hs
→
bin/gargantext-server
/Main.hs
View file @
bd47a5e3
File moved
notes/foldFinal.hs
deleted
100644 → 0
View file @
99d4f1f3
{-# LANGUAGE ExistentialQuantification #-}
{-# LANGUAGE RankNTypes #-}
import
Control.Lens
(
Getting
,
foldMapOf
)
data
Fold
i
o
=
forall
m
.
Monoid
m
=>
Fold
(
i
->
m
)
(
m
->
o
)
instance
Functor
(
Fold
i
)
where
fmap
k
(
Fold
tally
summarize
)
=
Fold
tally
(
k
.
summarize
)
instance
Applicative
(
Fold
i
)
where
pure
o
=
Fold
(
\
_
->
()
)
(
\
_
->
o
)
Fold
tallyF
summarizeF
<*>
Fold
tallyX
summarizeX
=
Fold
tally
summarize
where
tally
i
=
(
tallyF
i
,
tallyX
i
)
summarize
(
nF
,
nX
)
=
summarizeF
nF
(
summarizeX
nX
)
focus
::
(
forall
m
.
Monoid
m
=>
Getting
m
b
a
)
->
Fold
a
o
->
Fold
b
o
focus
lens
(
Fold
tally
summarize
)
=
Fold
(
foldMapOf
lens
tally
)
summarize
notes/folds.hs
deleted
100644 → 0
View file @
99d4f1f3
{-# LANGUAGE ExistentialQuantification #-}
-- | Thanks to Gabriel Gonzales and his beautiful folds
import
Data.Monoid
import
Prelude
hiding
(
head
,
last
,
all
,
any
,
sum
,
product
,
length
)
import
qualified
Data.Foldable
data
Fold
i
o
=
forall
m
.
Monoid
m
=>
Fold
(
i
->
m
)
(
m
->
o
)
fold
::
Fold
i
o
->
[
i
]
->
o
fold
(
Fold
tally
summarize
)
is
=
summarize
(
reduce
(
map
tally
is
))
where
reduce
=
Data
.
Foldable
.
foldl'
(
<>
)
mempty
--
head
::
Fold
a
(
Maybe
a
)
head
=
Fold
(
First
.
Just
)
getFirst
last
::
Fold
a
(
Maybe
a
)
last
=
Fold
(
Last
.
Just
)
getLast
--
all
::
(
a
->
Bool
)
->
Fold
a
Bool
all
predicate
=
Fold
(
All
.
predicate
)
getAll
any
::
(
a
->
Bool
)
->
Fold
a
Bool
any
predicate
=
Fold
(
Any
.
predicate
)
getAny
--
sum
::
Num
n
=>
Fold
n
n
sum
=
Fold
Sum
getSum
product
::
Num
n
=>
Fold
n
n
product
=
Fold
Product
getProduct
length
::
Num
n
=>
Fold
i
n
length
=
Fold
(
\
_
->
Sum
1
)
getSum
--
{-# LANGUAGE BangPatterns #-}
data
Average
a
=
Average
{
numerator
::
!
a
,
denominator
::
!
Int
}
instance
Num
a
=>
Monoid
(
Average
a
)
where
mempty
=
Average
0
0
mappend
(
Average
xL
nL
)
(
Average
xR
nR
)
=
Average
(
xL
+
xR
)
(
nL
+
nR
)
average
::
Fractional
a
=>
Fold
a
a
average
=
Fold
tally
summarize
where
tally
x
=
Average
x
1
summarize
(
Average
numerator
denominator
)
=
numerator
/
fromIntegral
denominator
package.yaml
View file @
bd47a5e3
...
...
@@ -124,9 +124,29 @@ library:
# - utc
executables
:
gargantext
:
gargantext
-server
:
main
:
Main.hs
source-dirs
:
app
source-dirs
:
bin/gargantext-server
ghc-options
:
-
-threaded
-
-rtsopts
-
-with-rtsopts=-N
-
-O2
-
-Wmissing-signatures
dependencies
:
-
base
-
containers
-
gargantext
-
vector
-
cassava
-
ini
-
optparse-generic
-
unordered-containers
-
full-text-search
gargantext-cli
:
main
:
Main.hs
source-dirs
:
bin/gargantext-cli
ghc-options
:
-
-threaded
-
-rtsopts
...
...
src/Gargantext/Text/Parsers.hs
View file @
bd47a5e3
...
...
@@ -23,20 +23,31 @@ please follow the types.
module
Gargantext.Text.Parsers
-- (parse, FileFormat(..))
where
import
Gargantext.Prelude
import
System.FilePath
(
FilePath
(),
takeExtension
)
import
Codec.Archive.Zip
(
withArchive
,
getEntry
,
getEntries
)
import
System.FilePath
(
FilePath
())
import
Data.Either.Extra
(
partitionEithers
)
import
Data.List
(
concat
)
import
qualified
Data.Map
as
DM
import
qualified
Data.ByteString
as
DB
import
Data.Ord
()
import
Data.String
()
import
Data.Either
(
Either
(
..
))
import
Data.Attoparsec.ByteString
(
parseOnly
,
Parser
)
import
Data.Text
(
Text
)
import
qualified
Data.Text
as
DT
-- | Activate Async for to parse in parallel
--
import Control.Concurrent.Async as CCA (mapConcurrently)
import
Control.Concurrent.Async
as
CCA
(
mapConcurrently
)
import
Data.Text.Encoding
(
decodeUtf8
)
import
Data.String
(
String
())
------------------------------------------------------------------------
import
Gargantext.Prelude
import
Gargantext.Text.Parsers.WOS
(
wosParser
)
------------------------------------------------------------------------
type
ParseError
=
String
type
Field
=
Text
...
...
@@ -60,38 +71,37 @@ data FileFormat = WOS -- Implemented (ISI Format)
-- TODO: to debug maybe add the filepath in error message
--parse :: FileFormat -> FilePath -> IO ([ParseError], [[(Text, Text)]])
--parse format path = do
-- files <- case takeExtension path of
-- ".zip" -> openZip path
-- _ -> pure <$> DB.readFile path
-- (as, bs) <- partitionEithers <$> mapConcurrently (runParser format) files
-- pure (as, map toText $ concat bs)
-- where
-- -- TODO : decode with bayesian inference on encodings
-- toText = map (\(a,b) -> (decodeUtf8 a, decodeUtf8 b))
--
--
---- | withParser:
---- According the format of the text, choosing the right parser.
---- TODO withParser :: FileFormat -> Parser [Document]
--withParser :: FileFormat -> Parser [[(DB.ByteString, DB.ByteString)]]
--withParser WOS = wosParser
----withParser DOC = docParser
----withParser ODT = odtParser
----withParser XML = xmlParser
----withParser _ = error "[ERROR] Parser not implemented yet"
--
--runParser :: FileFormat -> DB.ByteString
-- -> IO (Either String [[(DB.ByteString, DB.ByteString)]])
--runParser format text = pure $ parseOnly (withParser format) text
--
--openZip :: FilePath -> IO [DB.ByteString]
--openZip fp = do
-- path <- resolveFile' fp
-- entries <- withArchive path (DM.keys <$> getEntries)
-- bs <- mapConcurrently (\s -> withArchive path (getEntry s)) entries
-- pure bs
parse
::
FileFormat
->
FilePath
->
IO
([
ParseError
],
[[(
Text
,
Text
)]])
parse
format
path
=
do
files
<-
case
takeExtension
path
of
".zip"
->
openZip
path
_
->
pure
<$>
DB
.
readFile
path
(
as
,
bs
)
<-
partitionEithers
<$>
mapConcurrently
(
runParser
format
)
files
pure
(
as
,
map
toText
$
concat
bs
)
where
-- TODO : decode with bayesian inference on encodings
toText
=
map
(
\
(
a
,
b
)
->
(
decodeUtf8
a
,
decodeUtf8
b
))
-- | withParser:
-- According the format of the text, choosing the right parser.
-- TODO withParser :: FileFormat -> Parser [Document]
withParser
::
FileFormat
->
Parser
[[(
DB
.
ByteString
,
DB
.
ByteString
)]]
withParser
WOS
=
wosParser
--withParser DOC = docParser
--withParser ODT = odtParser
--withParser XML = xmlParser
--withParser _ = error "[ERROR] Parser not implemented yet"
runParser
::
FileFormat
->
DB
.
ByteString
->
IO
(
Either
String
[[(
DB
.
ByteString
,
DB
.
ByteString
)]])
runParser
format
text
=
pure
$
parseOnly
(
withParser
format
)
text
openZip
::
FilePath
->
IO
[
DB
.
ByteString
]
openZip
fp
=
do
entries
<-
withArchive
fp
(
DM
.
keys
<$>
getEntries
)
bs
<-
mapConcurrently
(
\
s
->
withArchive
fp
(
getEntry
s
))
entries
pure
bs
clean
::
Text
->
Text
clean
txt
=
DT
.
map
clean'
txt
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment