Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
H
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Przemyslaw Kaminski
haskell-gargantext
Commits
dc1e008d
Commit
dc1e008d
authored
Jul 22, 2019
by
Alexandre Delanoë
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
[FEAT] Learn function to export models
parent
61e8d712
Changes
6
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
98 additions
and
42 deletions
+98
-42
Flow.hs
src/Gargantext/Database/Flow.hs
+2
-1
Learn.hs
src/Gargantext/Database/Learn.hs
+67
-25
Prelude.hs
src/Gargantext/Prelude.hs
+3
-0
Utils.hs
src/Gargantext/Prelude/Utils.hs
+2
-2
GrandDebat.hs
src/Gargantext/Text/Corpus/Parsers/GrandDebat.hs
+7
-12
Learn.hs
src/Gargantext/Text/Learn.hs
+17
-2
No files found.
src/Gargantext/Database/Flow.hs
View file @
dc1e008d
...
...
@@ -71,6 +71,7 @@ import Gargantext.Text.Corpus.Parsers (parseFile, FileFormat)
import
qualified
Gargantext.Text.Corpus.API.Isidore
as
Isidore
import
Gargantext.Text.Terms
(
TermType
(
..
),
tt_lang
,
extractTerms
,
uniText
)
import
Gargantext.Text.Terms.Mono.Stem.En
(
stemIt
)
import
Gargantext.Prelude.Utils
hiding
(
hash
)
import
System.FilePath
(
FilePath
)
import
qualified
Data.List
as
List
import
qualified
Data.Map
as
Map
...
...
@@ -124,7 +125,7 @@ flowCorpusDebat :: FlowCmdM env err m
flowCorpusDebat
u
n
l
fp
=
do
docs
<-
liftIO
(
splitEvery
500
<$>
take
l
<$>
GD
.
readFile
fp
<$>
readFile'
fp
::
IO
[[
GD
.
GrandDebatReference
]]
)
flowCorpus
u
n
(
Multi
FR
)
(
map
(
map
toHyperdataDocument
)
docs
)
...
...
src/Gargantext/Database/Learn.hs
View file @
dc1e008d
...
...
@@ -13,6 +13,8 @@ Portability : POSIX
{-# LANGUAGE QuasiQuotes #-}
{-# LANGUAGE RankNTypes #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE MonoLocalBinds #-}
module
Gargantext.Database.Learn
where
...
...
@@ -27,42 +29,82 @@ import qualified Data.List as List
import
qualified
Data.Text
as
Text
import
Gargantext.Database.Schema.NodeNode
(
nodesToFavorite
)
import
Gargantext.API.Node
(
delDocs
,
Documents
(
..
))
import
Gargantext.Database.Utils
(
Cmd
)
import
Gargantext.Database.Schema.Node
(
HasNodeError
)
--import Gargantext.Database.Utils (Cmd)
--import Gargantext.Database.Schema.Node (HasNodeError)
import
Gargantext.API
import
Gargantext.API.Settings
import
Gargantext.Database.Flow
(
FlowCmdM
)
text
::
FacetDoc
->
(
NodeId
,
Text
)
text
(
FacetDoc
nId
_
_
h
_
_
)
=
(
nId
,
title
<>
""
<>
Text
.
take
100
abstr
)
where
title
=
maybe
""
identity
(
_hyperdataDocument_title
h
)
abstr
=
maybe
""
identity
(
_hyperdataDocument_abstract
h
)
--moreLike docs_fav docs_trash docs_test = do
data
FavTrash
=
IsFav
|
IsTrash
data
FavOrTrash
=
IsFav
|
IsTrash
deriving
(
Eq
)
moreLike
::
HasNodeError
err
=>
FavTrash
->
CorpusId
->
Cmd
err
[(
NodeId
,
Maybe
Bool
)]
--moreLike :: FlowCmdM env error m => FavOrTrash -> CorpusId -> m (Events Bool, [FacetDoc])
moreLike
::
FlowCmdM
DevEnv
GargError
m
=>
FavOrTrash
->
CorpusId
->
m
[
FacetDoc
]
moreLike
ft
cId
=
do
let
b
=
if
(
==
)
ft
IsFav
then
True
else
False
priors
<-
getPriors
ft
cId
moreLikeWith
priors
ft
cId
---------------------------------------------------------------------------
getPriors
::
FlowCmdM
DevEnv
GargError
m
=>
FavOrTrash
->
CorpusId
->
m
(
Events
Bool
)
getPriors
ft
cId
=
do
docs_trash
<-
runViewDocuments
cId
True
Nothing
Nothing
Nothing
docs_trash
<-
map
text
<$>
runViewDocuments
cId
True
Nothing
Nothing
Nothing
docs_fav
<-
map
text
<$>
filter
(
\
(
FacetDoc
_
_
_
_
f
_
)
->
f
==
True
)
<$>
runViewDocuments
cId
False
Nothing
Nothing
Nothing
docs_test
<-
map
text
<$>
filter
(
\
(
FacetDoc
_
_
_
_
f
_
)
->
f
==
False
)
<$>
runViewDocuments
cId
False
Nothing
Nothing
Nothing
docs_fav
<-
filter
(
\
(
FacetDoc
_
_
_
_
f
_
)
->
f
==
True
)
<$>
runViewDocuments
cId
False
Nothing
Nothing
Nothing
let
priors
=
priorEventsWith
snd
b
(
List
.
zip
(
repeat
False
)
docs_fav
<>
List
.
zip
(
repeat
True
)
docs_trash
)
let
priors
=
priorEventsWith
text
(
fav2bool
ft
)
(
List
.
zip
(
repeat
False
)
docs_fav
<>
List
.
zip
(
repeat
True
)
docs_trash
)
pure
priors
moreLikeWith
::
FlowCmdM
DevEnv
GargError
m
=>
Events
Bool
->
FavOrTrash
->
CorpusId
->
m
[
FacetDoc
]
moreLikeWith
priors
ft
cId
=
do
pure
$
filter
((
==
)
(
Just
$
not
b
)
.
snd
)
$
map
(
\
x
->
(
fst
x
,
detectDefaultWithPriors
snd
priors
x
))
docs_test
docs_test
<-
filter
(
\
(
FacetDoc
_
_
_
_
f
_
)
->
f
==
False
)
<$>
runViewDocuments
cId
False
Nothing
Nothing
Nothing
learnModify
::
HasNodeError
err
=>
FavTrash
->
CorpusId
->
[
NodeId
]
->
Cmd
err
[
Int
]
learnModify
favTrash
cId
ns
=
case
favTrash
of
let
results
=
map
fst
$
filter
((
==
)
(
Just
$
not
$
fav2bool
ft
)
.
snd
)
$
map
(
\
f
->
(
f
,
detectDefaultWithPriors
text
priors
f
))
docs_test
pure
results
---------------------------------------------------------------------------
fav2bool
::
FavOrTrash
->
Bool
fav2bool
ft
=
if
(
==
)
ft
IsFav
then
True
else
False
text
::
FacetDoc
->
Text
text
(
FacetDoc
_
_
_
h
_
_
)
=
title
<>
""
<>
Text
.
take
100
abstr
where
title
=
maybe
""
identity
(
_hyperdataDocument_title
h
)
abstr
=
maybe
""
identity
(
_hyperdataDocument_abstract
h
)
---------------------------------------------------------------------------
apply
::
(
FlowCmdM
DevEnv
GargError
m
)
=>
FavOrTrash
->
CorpusId
->
[
NodeId
]
->
m
[
Int
]
apply
favTrash
cId
ns
=
case
favTrash
of
IsFav
->
nodesToFavorite
$
map
(
\
n
->
(
cId
,
n
,
True
))
ns
IsTrash
->
delDocs
cId
(
Documents
ns
)
learnAndApply
::
HasNodeError
err
=>
FavTrash
->
CorpusId
->
Cmd
err
[
Int
]
learnAndApply
ft
cId
=
do
ids
<-
map
fst
<$>
moreLike
ft
cId
learnModify
ft
cId
ids
moreLikeAndApply
::
FlowCmdM
DevEnv
GargError
m
=>
FavOrTrash
->
CorpusId
->
m
[
Int
]
moreLikeAndApply
ft
cId
=
do
priors
<-
getPriors
ft
cId
moreLikeWithAndApply
priors
ft
cId
moreLikeWithAndApply
::
FlowCmdM
DevEnv
GargError
m
=>
Events
Bool
->
FavOrTrash
->
CorpusId
->
m
[
Int
]
moreLikeWithAndApply
priors
ft
cId
=
do
ids
<-
map
facetDoc_id
<$>
moreLikeWith
priors
ft
cId
apply
ft
cId
ids
src/Gargantext/Prelude.hs
View file @
dc1e008d
...
...
@@ -304,3 +304,6 @@ tail' = listSafeN "tail" tailMay
init'
::
Text
->
[
a
]
->
[
a
]
init'
=
listSafeN
"init"
initMay
------------------------------------------------------------------------
src/Gargantext/Prelude/Utils.hs
View file @
dc1e008d
...
...
@@ -54,9 +54,9 @@ class ReadFile a where
readFile'
::
FilePath
->
IO
a
sav
eFile
::
(
MonadReader
env
m
,
MonadIO
m
,
HasSettings
env
,
SaveFile
a
)
writ
eFile
::
(
MonadReader
env
m
,
MonadIO
m
,
HasSettings
env
,
SaveFile
a
)
=>
a
->
m
FilePath
sav
eFile
a
=
do
writ
eFile
a
=
do
dataPath
<-
view
(
settings
.
fileFolder
)
<$>
ask
(
fp
,
fn
)
<-
liftIO
$
(
toPath
3
)
.
hash
.
Text
.
pack
.
show
<$>
newStdGen
...
...
src/Gargantext/Text/Corpus/Parsers/GrandDebat.hs
View file @
dc1e008d
...
...
@@ -19,18 +19,17 @@ TODO: create a separate Lib.
module
Gargantext.Text.Corpus.Parsers.GrandDebat
where
import
GHC.IO
(
FilePath
)
import
Data.Aeson
(
ToJSON
,
FromJSON
)
import
qualified
Data.JsonStream.Parser
as
P
--import Data.Either (either)
import
Data.Maybe
(
Maybe
())
import
Data.Text
(
Text
)
import
qualified
Data.Text
as
Text
import
qualified
Data.ByteString.Lazy
as
DBL
import
GHC.Generics
(
Generic
)
import
Gargantext.Prelude
import
Gargantext.Database.Types.Node
import
Gargantext.Core
(
Lang
(
..
))
import
Gargantext.Database.Types.Node
import
Gargantext.Prelude
import
Gargantext.Prelude.Utils
import
qualified
Data.ByteString.Lazy
as
DBL
import
qualified
Data.JsonStream.Parser
as
P
import
qualified
Data.Text
as
Text
data
GrandDebatReference
=
GrandDebatReference
...
...
@@ -91,16 +90,12 @@ instance ToHyperdataDocument GrandDebatReference
True
->
r'
False
->
""
class
ReadFile
a
where
readFile
::
FilePath
->
IO
a
instance
ReadFile
[
GrandDebatReference
]
where
-- | read json: 3 version below are working but with increased optimization
--readFile fp = maybe [] identity <$> decode <$> DBL.readFile fp
--readFile fp = either (panic . Text.pack) identity <$> P.eitherDecode <$> DBL.readFile fp
readFile
fp
=
P
.
parseLazyByteString
(
P
.
arrayOf
P
.
value
)
<$>
DBL
.
readFile
fp
readFile
'
fp
=
P
.
parseLazyByteString
(
P
.
arrayOf
P
.
value
)
<$>
DBL
.
readFile
fp
src/Gargantext/Text/Learn.hs
View file @
dc1e008d
...
...
@@ -18,24 +18,30 @@ Main type here is String.
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE TypeSynonymInstances #-}
{-# LANGUAGE FlexibleInstances #-}
module
Gargantext.Text.Learn
-- (detectLang, detectLangs, stopList)
where
--import Data.Char (toLower)
import
Codec.Serialise
import
qualified
Data.List
as
DL
import
Data.Maybe
(
maybe
)
import
Data.Map.Strict
(
Map
,
toList
)
import
qualified
Data.Map.Strict
as
DM
import
GHC.Generics
import
Data.String
(
String
)
import
Data.Text
(
Text
)
import
Data.Text
(
pack
,
unpack
,
toLower
)
import
Data.Tuple.Extra
(
both
)
import
qualified
Data.ByteString.Lazy
as
BSL
import
Gargantext.Prelude
import
Gargantext.Prelude.Utils
import
Gargantext.Core
(
Lang
(
..
),
allLangs
)
import
Gargantext.Text.Terms.Mono
(
words
)
import
Gargantext.Text.Metrics.Count
(
occurrencesWith
)
...
...
@@ -172,7 +178,16 @@ toEvents e ns n = foldl' (opEvent (+)) (emptyEvent e ns n) . map (toEvent ns n)
data
EventBook
=
EventBook
{
events_freq
::
Map
String
Freq
,
events_n
::
Map
StringSize
TotalFreq
}
deriving
(
Show
)
deriving
(
Show
,
Generic
)
instance
Serialise
EventBook
instance
(
Serialise
a
,
Ord
a
)
=>
SaveFile
(
Events
a
)
where
saveFile'
f
d
=
BSL
.
writeFile
f
(
serialise
d
)
instance
(
Serialise
a
,
Ord
a
)
=>
ReadFile
(
Events
a
)
where
readFile'
filepath
=
deserialise
<$>
BSL
.
readFile
filepath
emptyEventBook
::
[
Int
]
->
Int
->
EventBook
emptyEventBook
ns
n
=
wordToBook
ns
n
" "
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment