Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
195
Issues
195
List
Board
Labels
Milestones
Merge Requests
12
Merge Requests
12
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
gargantext
haskell-gargantext
Commits
dc1e008d
Commit
dc1e008d
authored
Jul 22, 2019
by
Alexandre Delanoë
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
[FEAT] Learn function to export models
parent
61e8d712
Changes
6
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
98 additions
and
42 deletions
+98
-42
Flow.hs
src/Gargantext/Database/Flow.hs
+2
-1
Learn.hs
src/Gargantext/Database/Learn.hs
+67
-25
Prelude.hs
src/Gargantext/Prelude.hs
+3
-0
Utils.hs
src/Gargantext/Prelude/Utils.hs
+2
-2
GrandDebat.hs
src/Gargantext/Text/Corpus/Parsers/GrandDebat.hs
+7
-12
Learn.hs
src/Gargantext/Text/Learn.hs
+17
-2
No files found.
src/Gargantext/Database/Flow.hs
View file @
dc1e008d
...
@@ -71,6 +71,7 @@ import Gargantext.Text.Corpus.Parsers (parseFile, FileFormat)
...
@@ -71,6 +71,7 @@ import Gargantext.Text.Corpus.Parsers (parseFile, FileFormat)
import
qualified
Gargantext.Text.Corpus.API.Isidore
as
Isidore
import
qualified
Gargantext.Text.Corpus.API.Isidore
as
Isidore
import
Gargantext.Text.Terms
(
TermType
(
..
),
tt_lang
,
extractTerms
,
uniText
)
import
Gargantext.Text.Terms
(
TermType
(
..
),
tt_lang
,
extractTerms
,
uniText
)
import
Gargantext.Text.Terms.Mono.Stem.En
(
stemIt
)
import
Gargantext.Text.Terms.Mono.Stem.En
(
stemIt
)
import
Gargantext.Prelude.Utils
hiding
(
hash
)
import
System.FilePath
(
FilePath
)
import
System.FilePath
(
FilePath
)
import
qualified
Data.List
as
List
import
qualified
Data.List
as
List
import
qualified
Data.Map
as
Map
import
qualified
Data.Map
as
Map
...
@@ -124,7 +125,7 @@ flowCorpusDebat :: FlowCmdM env err m
...
@@ -124,7 +125,7 @@ flowCorpusDebat :: FlowCmdM env err m
flowCorpusDebat
u
n
l
fp
=
do
flowCorpusDebat
u
n
l
fp
=
do
docs
<-
liftIO
(
splitEvery
500
docs
<-
liftIO
(
splitEvery
500
<$>
take
l
<$>
take
l
<$>
GD
.
readFile
fp
<$>
readFile'
fp
::
IO
[[
GD
.
GrandDebatReference
]]
::
IO
[[
GD
.
GrandDebatReference
]]
)
)
flowCorpus
u
n
(
Multi
FR
)
(
map
(
map
toHyperdataDocument
)
docs
)
flowCorpus
u
n
(
Multi
FR
)
(
map
(
map
toHyperdataDocument
)
docs
)
...
...
src/Gargantext/Database/Learn.hs
View file @
dc1e008d
...
@@ -13,6 +13,8 @@ Portability : POSIX
...
@@ -13,6 +13,8 @@ Portability : POSIX
{-# LANGUAGE QuasiQuotes #-}
{-# LANGUAGE QuasiQuotes #-}
{-# LANGUAGE RankNTypes #-}
{-# LANGUAGE RankNTypes #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE MonoLocalBinds #-}
module
Gargantext.Database.Learn
where
module
Gargantext.Database.Learn
where
...
@@ -27,42 +29,82 @@ import qualified Data.List as List
...
@@ -27,42 +29,82 @@ import qualified Data.List as List
import
qualified
Data.Text
as
Text
import
qualified
Data.Text
as
Text
import
Gargantext.Database.Schema.NodeNode
(
nodesToFavorite
)
import
Gargantext.Database.Schema.NodeNode
(
nodesToFavorite
)
import
Gargantext.API.Node
(
delDocs
,
Documents
(
..
))
import
Gargantext.API.Node
(
delDocs
,
Documents
(
..
))
import
Gargantext.Database.Utils
(
Cmd
)
--import Gargantext.Database.Utils (Cmd)
import
Gargantext.Database.Schema.Node
(
HasNodeError
)
--import Gargantext.Database.Schema.Node (HasNodeError)
import
Gargantext.API
import
Gargantext.API.Settings
import
Gargantext.Database.Flow
(
FlowCmdM
)
text
::
FacetDoc
->
(
NodeId
,
Text
)
data
FavOrTrash
=
IsFav
|
IsTrash
text
(
FacetDoc
nId
_
_
h
_
_
)
=
(
nId
,
title
<>
""
<>
Text
.
take
100
abstr
)
where
title
=
maybe
""
identity
(
_hyperdataDocument_title
h
)
abstr
=
maybe
""
identity
(
_hyperdataDocument_abstract
h
)
--moreLike docs_fav docs_trash docs_test = do
data
FavTrash
=
IsFav
|
IsTrash
deriving
(
Eq
)
deriving
(
Eq
)
moreLike
::
HasNodeError
err
=>
FavTrash
->
CorpusId
->
Cmd
err
[(
NodeId
,
Maybe
Bool
)]
--moreLike :: FlowCmdM env error m => FavOrTrash -> CorpusId -> m (Events Bool, [FacetDoc])
moreLike
::
FlowCmdM
DevEnv
GargError
m
=>
FavOrTrash
->
CorpusId
->
m
[
FacetDoc
]
moreLike
ft
cId
=
do
moreLike
ft
cId
=
do
let
b
=
if
(
==
)
ft
IsFav
then
True
else
False
priors
<-
getPriors
ft
cId
moreLikeWith
priors
ft
cId
---------------------------------------------------------------------------
getPriors
::
FlowCmdM
DevEnv
GargError
m
=>
FavOrTrash
->
CorpusId
->
m
(
Events
Bool
)
getPriors
ft
cId
=
do
docs_trash
<-
runViewDocuments
cId
True
Nothing
Nothing
Nothing
docs_trash
<-
map
text
<$>
runViewDocuments
cId
True
Nothing
Nothing
Nothing
docs_fav
<-
filter
(
\
(
FacetDoc
_
_
_
_
f
_
)
->
f
==
True
)
docs_fav
<-
map
text
<$>
filter
(
\
(
FacetDoc
_
_
_
_
f
_
)
->
f
==
True
)
<$>
runViewDocuments
cId
False
Nothing
Nothing
Nothing
<$>
runViewDocuments
cId
False
Nothing
Nothing
Nothing
docs_test
<-
map
text
<$>
filter
(
\
(
FacetDoc
_
_
_
_
f
_
)
->
f
==
False
)
<$>
runViewDocuments
cId
False
Nothing
Nothing
Nothing
let
priors
=
priorEventsWith
snd
b
(
List
.
zip
(
repeat
False
)
docs_fav
<>
List
.
zip
(
repeat
True
)
docs_trash
let
priors
=
priorEventsWith
text
(
fav2bool
ft
)
(
List
.
zip
(
repeat
False
)
docs_fav
)
<>
List
.
zip
(
repeat
True
)
docs_trash
)
pure
priors
moreLikeWith
::
FlowCmdM
DevEnv
GargError
m
=>
Events
Bool
->
FavOrTrash
->
CorpusId
->
m
[
FacetDoc
]
moreLikeWith
priors
ft
cId
=
do
pure
$
filter
((
==
)
(
Just
$
not
b
)
.
snd
)
$
map
(
\
x
->
(
fst
x
,
detectDefaultWithPriors
snd
priors
x
))
docs_test
docs_test
<-
filter
(
\
(
FacetDoc
_
_
_
_
f
_
)
->
f
==
False
)
<$>
runViewDocuments
cId
False
Nothing
Nothing
Nothing
learnModify
::
HasNodeError
err
=>
FavTrash
->
CorpusId
->
[
NodeId
]
->
Cmd
err
[
Int
]
let
results
=
map
fst
learnModify
favTrash
cId
ns
=
case
favTrash
of
$
filter
((
==
)
(
Just
$
not
$
fav2bool
ft
)
.
snd
)
$
map
(
\
f
->
(
f
,
detectDefaultWithPriors
text
priors
f
))
docs_test
pure
results
---------------------------------------------------------------------------
fav2bool
::
FavOrTrash
->
Bool
fav2bool
ft
=
if
(
==
)
ft
IsFav
then
True
else
False
text
::
FacetDoc
->
Text
text
(
FacetDoc
_
_
_
h
_
_
)
=
title
<>
""
<>
Text
.
take
100
abstr
where
title
=
maybe
""
identity
(
_hyperdataDocument_title
h
)
abstr
=
maybe
""
identity
(
_hyperdataDocument_abstract
h
)
---------------------------------------------------------------------------
apply
::
(
FlowCmdM
DevEnv
GargError
m
)
=>
FavOrTrash
->
CorpusId
->
[
NodeId
]
->
m
[
Int
]
apply
favTrash
cId
ns
=
case
favTrash
of
IsFav
->
nodesToFavorite
$
map
(
\
n
->
(
cId
,
n
,
True
))
ns
IsFav
->
nodesToFavorite
$
map
(
\
n
->
(
cId
,
n
,
True
))
ns
IsTrash
->
delDocs
cId
(
Documents
ns
)
IsTrash
->
delDocs
cId
(
Documents
ns
)
learnAndApply
::
HasNodeError
err
=>
FavTrash
->
CorpusId
->
Cmd
err
[
Int
]
learnAndApply
ft
cId
=
do
ids
<-
map
fst
<$>
moreLike
ft
cId
learnModify
ft
cId
ids
moreLikeAndApply
::
FlowCmdM
DevEnv
GargError
m
=>
FavOrTrash
->
CorpusId
->
m
[
Int
]
moreLikeAndApply
ft
cId
=
do
priors
<-
getPriors
ft
cId
moreLikeWithAndApply
priors
ft
cId
moreLikeWithAndApply
::
FlowCmdM
DevEnv
GargError
m
=>
Events
Bool
->
FavOrTrash
->
CorpusId
->
m
[
Int
]
moreLikeWithAndApply
priors
ft
cId
=
do
ids
<-
map
facetDoc_id
<$>
moreLikeWith
priors
ft
cId
apply
ft
cId
ids
src/Gargantext/Prelude.hs
View file @
dc1e008d
...
@@ -304,3 +304,6 @@ tail' = listSafeN "tail" tailMay
...
@@ -304,3 +304,6 @@ tail' = listSafeN "tail" tailMay
init'
::
Text
->
[
a
]
->
[
a
]
init'
::
Text
->
[
a
]
->
[
a
]
init'
=
listSafeN
"init"
initMay
init'
=
listSafeN
"init"
initMay
------------------------------------------------------------------------
src/Gargantext/Prelude/Utils.hs
View file @
dc1e008d
...
@@ -54,9 +54,9 @@ class ReadFile a where
...
@@ -54,9 +54,9 @@ class ReadFile a where
readFile'
::
FilePath
->
IO
a
readFile'
::
FilePath
->
IO
a
sav
eFile
::
(
MonadReader
env
m
,
MonadIO
m
,
HasSettings
env
,
SaveFile
a
)
writ
eFile
::
(
MonadReader
env
m
,
MonadIO
m
,
HasSettings
env
,
SaveFile
a
)
=>
a
->
m
FilePath
=>
a
->
m
FilePath
sav
eFile
a
=
do
writ
eFile
a
=
do
dataPath
<-
view
(
settings
.
fileFolder
)
<$>
ask
dataPath
<-
view
(
settings
.
fileFolder
)
<$>
ask
(
fp
,
fn
)
<-
liftIO
$
(
toPath
3
)
.
hash
.
Text
.
pack
.
show
<$>
newStdGen
(
fp
,
fn
)
<-
liftIO
$
(
toPath
3
)
.
hash
.
Text
.
pack
.
show
<$>
newStdGen
...
...
src/Gargantext/Text/Corpus/Parsers/GrandDebat.hs
View file @
dc1e008d
...
@@ -19,18 +19,17 @@ TODO: create a separate Lib.
...
@@ -19,18 +19,17 @@ TODO: create a separate Lib.
module
Gargantext.Text.Corpus.Parsers.GrandDebat
module
Gargantext.Text.Corpus.Parsers.GrandDebat
where
where
import
GHC.IO
(
FilePath
)
import
Data.Aeson
(
ToJSON
,
FromJSON
)
import
Data.Aeson
(
ToJSON
,
FromJSON
)
import
qualified
Data.JsonStream.Parser
as
P
--import Data.Either (either)
import
Data.Maybe
(
Maybe
())
import
Data.Maybe
(
Maybe
())
import
Data.Text
(
Text
)
import
Data.Text
(
Text
)
import
qualified
Data.Text
as
Text
import
qualified
Data.ByteString.Lazy
as
DBL
import
GHC.Generics
(
Generic
)
import
GHC.Generics
(
Generic
)
import
Gargantext.Prelude
import
Gargantext.Database.Types.Node
import
Gargantext.Core
(
Lang
(
..
))
import
Gargantext.Core
(
Lang
(
..
))
import
Gargantext.Database.Types.Node
import
Gargantext.Prelude
import
Gargantext.Prelude.Utils
import
qualified
Data.ByteString.Lazy
as
DBL
import
qualified
Data.JsonStream.Parser
as
P
import
qualified
Data.Text
as
Text
data
GrandDebatReference
=
GrandDebatReference
data
GrandDebatReference
=
GrandDebatReference
...
@@ -91,16 +90,12 @@ instance ToHyperdataDocument GrandDebatReference
...
@@ -91,16 +90,12 @@ instance ToHyperdataDocument GrandDebatReference
True
->
r'
True
->
r'
False
->
""
False
->
""
class
ReadFile
a
where
readFile
::
FilePath
->
IO
a
instance
ReadFile
[
GrandDebatReference
]
instance
ReadFile
[
GrandDebatReference
]
where
where
-- | read json: 3 version below are working but with increased optimization
-- | read json: 3 version below are working but with increased optimization
--readFile fp = maybe [] identity <$> decode <$> DBL.readFile fp
--readFile fp = maybe [] identity <$> decode <$> DBL.readFile fp
--readFile fp = either (panic . Text.pack) identity <$> P.eitherDecode <$> DBL.readFile fp
--readFile fp = either (panic . Text.pack) identity <$> P.eitherDecode <$> DBL.readFile fp
readFile
fp
=
P
.
parseLazyByteString
(
P
.
arrayOf
P
.
value
)
<$>
DBL
.
readFile
fp
readFile
'
fp
=
P
.
parseLazyByteString
(
P
.
arrayOf
P
.
value
)
<$>
DBL
.
readFile
fp
src/Gargantext/Text/Learn.hs
View file @
dc1e008d
...
@@ -18,24 +18,30 @@ Main type here is String.
...
@@ -18,24 +18,30 @@ Main type here is String.
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE TypeSynonymInstances #-}
{-# LANGUAGE FlexibleInstances #-}
module
Gargantext.Text.Learn
-- (detectLang, detectLangs, stopList)
module
Gargantext.Text.Learn
-- (detectLang, detectLangs, stopList)
where
where
--import Data.Char (toLower)
import
Codec.Serialise
import
qualified
Data.List
as
DL
import
qualified
Data.List
as
DL
import
Data.Maybe
(
maybe
)
import
Data.Maybe
(
maybe
)
import
Data.Map.Strict
(
Map
,
toList
)
import
Data.Map.Strict
(
Map
,
toList
)
import
qualified
Data.Map.Strict
as
DM
import
qualified
Data.Map.Strict
as
DM
import
GHC.Generics
import
Data.String
(
String
)
import
Data.String
(
String
)
import
Data.Text
(
Text
)
import
Data.Text
(
Text
)
import
Data.Text
(
pack
,
unpack
,
toLower
)
import
Data.Text
(
pack
,
unpack
,
toLower
)
import
Data.Tuple.Extra
(
both
)
import
Data.Tuple.Extra
(
both
)
import
qualified
Data.ByteString.Lazy
as
BSL
import
Gargantext.Prelude
import
Gargantext.Prelude
import
Gargantext.Prelude.Utils
import
Gargantext.Core
(
Lang
(
..
),
allLangs
)
import
Gargantext.Core
(
Lang
(
..
),
allLangs
)
import
Gargantext.Text.Terms.Mono
(
words
)
import
Gargantext.Text.Terms.Mono
(
words
)
import
Gargantext.Text.Metrics.Count
(
occurrencesWith
)
import
Gargantext.Text.Metrics.Count
(
occurrencesWith
)
...
@@ -172,7 +178,16 @@ toEvents e ns n = foldl' (opEvent (+)) (emptyEvent e ns n) . map (toEvent ns n)
...
@@ -172,7 +178,16 @@ toEvents e ns n = foldl' (opEvent (+)) (emptyEvent e ns n) . map (toEvent ns n)
data
EventBook
=
EventBook
{
events_freq
::
Map
String
Freq
data
EventBook
=
EventBook
{
events_freq
::
Map
String
Freq
,
events_n
::
Map
StringSize
TotalFreq
,
events_n
::
Map
StringSize
TotalFreq
}
}
deriving
(
Show
)
deriving
(
Show
,
Generic
)
instance
Serialise
EventBook
instance
(
Serialise
a
,
Ord
a
)
=>
SaveFile
(
Events
a
)
where
saveFile'
f
d
=
BSL
.
writeFile
f
(
serialise
d
)
instance
(
Serialise
a
,
Ord
a
)
=>
ReadFile
(
Events
a
)
where
readFile'
filepath
=
deserialise
<$>
BSL
.
readFile
filepath
emptyEventBook
::
[
Int
]
->
Int
->
EventBook
emptyEventBook
::
[
Int
]
->
Int
->
EventBook
emptyEventBook
ns
n
=
wordToBook
ns
n
" "
emptyEventBook
ns
n
=
wordToBook
ns
n
" "
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment