Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
163
Issues
163
List
Board
Labels
Milestones
Merge Requests
9
Merge Requests
9
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
gargantext
haskell-gargantext
Commits
485666a2
Commit
485666a2
authored
Apr 01, 2019
by
Alexandre Delanoë
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
[LEARN] Grid Search improved.
parent
06aa56b6
Changes
7
Hide whitespace changes
Inline
Side-by-side
Showing
7 changed files
with
116 additions
and
65 deletions
+116
-65
Node.hs
src/Gargantext/API/Node.hs
+4
-6
Flow.hs
src/Gargantext/Database/Flow.hs
+2
-2
Lists.hs
src/Gargantext/Database/Lists.hs
+35
-9
Metrics.hs
src/Gargantext/Database/Metrics.hs
+0
-1
Root.hs
src/Gargantext/Database/Root.hs
+1
-0
List.hs
src/Gargantext/Text/List.hs
+21
-8
Learn.hs
src/Gargantext/Text/List/Learn.hs
+53
-39
No files found.
src/Gargantext/API/Node.hs
View file @
485666a2
...
...
@@ -60,7 +60,7 @@ import Gargantext.Database.Types.Node (CorpusId, ContactId)
import
Gargantext.Database.Utils
-- (Cmd, CmdM)
import
Gargantext.Prelude
import
Gargantext.API.Settings
import
Gargantext.Text.Metrics
import
Gargantext.Text.Metrics
(
Scored
(
..
))
import
Gargantext.Viz.Graph
hiding
(
Node
)
-- (Graph(_graph_metadata),LegendField(..), GraphMetadata(..),readGraphFromJson,defaultGraph)
import
Gargantext.Viz.Graph.Tools
(
cooc2graph
)
import
Servant
...
...
@@ -69,7 +69,7 @@ import Test.QuickCheck.Arbitrary (Arbitrary, arbitrary)
import
qualified
Data.Map
as
Map
import
qualified
Gargantext.Database.Node.Update
as
U
(
update
,
Update
(
..
))
{-
-
{-
import qualified Gargantext.Text.List.Learn as Learn
import qualified Data.Vector as Vec
--}
...
...
@@ -408,10 +408,8 @@ getMetrics cId maybeListId tabType maybeLimit = do
listType
t
m
=
maybe
(
panic
errorMsg
)
fst
$
Map
.
lookup
t
m
errorMsg
=
"API.Node.metrics: key absent"
{-
let metrics' = Map.fromListWith (<>) $ map (\(Metric _ s1 s2 lt) -> (lt, [Vec.fromList [s1,s2]])) metrics
_ <- Learn.grid metrics'
--}
pure
$
Metrics
metrics
src/Gargantext/Database/Flow.hs
View file @
485666a2
...
...
@@ -60,7 +60,7 @@ import Gargantext.Database.Types.Node -- (HyperdataDocument(..), NodeType(..), N
import
Gargantext.Database.Utils
(
Cmd
,
CmdM
)
import
Gargantext.Ext.IMT
(
toSchoolName
)
import
Gargantext.Prelude
import
Gargantext.Text.List
(
buildNgramsLists
)
import
Gargantext.Text.List
(
buildNgramsLists
,
StopSize
(
..
)
)
--import Gargantext.Text.Parsers (parseDocs, FileFormat)
import
Gargantext.Text.Terms
(
TermType
(
..
),
tt_lang
)
import
Gargantext.Text.Terms
(
extractTerms
)
...
...
@@ -127,7 +127,7 @@ flowCorpusUser l userName corpusName ids = do
-- User List Flow
(
_masterUserId
,
_masterRootId
,
masterCorpusId
)
<-
getOrMkRootWithCorpus
userMaster
""
ngs
<-
buildNgramsLists
l
2
3
userCorpusId
masterCorpusId
ngs
<-
buildNgramsLists
l
2
3
(
StopSize
3
)
userCorpusId
masterCorpusId
userListId
<-
flowList
userId
userCorpusId
ngs
printDebug
"userListId"
userListId
...
...
src/Gargantext/Database/Lists.hs
View file @
485666a2
...
...
@@ -25,20 +25,29 @@ Portability : POSIX
module
Gargantext.Database.Lists
where
import
Control.Arrow
(
returnA
)
--import Control.Arrow (returnA)
--import Gargantext.API.Metrics
--import Gargantext.Core.Types.Individu (Username)
--import Gargantext.Database.Config (nodeTypeId)
--import Gargantext.Database.Schema.Node -- (HasNodeError, queryNodeTable)
--import Gargantext.Database.Schema.User -- (queryUserTable)
--import Gargantext.Database.Utils
--import Opaleye hiding (FromField)
--import Opaleye.Internal.QueryArr (Query)
import
Gargantext.API.Ngrams
(
TabType
(
..
))
import
Gargantext.Core.Types
-- (NodePoly(..), NodeCorpus, ListId)
import
Gargantext.Core.Types.Individu
(
Username
)
import
Gargantext.Database.Config
(
nodeTypeId
)
import
Gargantext.Database.Schema.Node
-- (HasNodeError, queryNodeTable)
import
Gargantext.Database.Schema.User
-- (queryUserTable)
import
Gargantext.Database.Utils
import
Gargantext.Database.Flow
(
FlowCmdM
)
import
Gargantext.Prelude
hiding
(
sum
,
head
)
import
Opaleye
hiding
(
FromField
)
import
Opaleye.Internal.QueryArr
(
Query
)
import
Gargantext.Text.Metrics
(
Scored
(
..
))
import
Prelude
hiding
(
null
,
id
,
map
,
sum
)
import
Servant
(
ServantErr
)
import
qualified
Data.Map
as
Map
import
qualified
Data.Vector
as
Vec
import
qualified
Gargantext.Database.Metrics
as
Metrics
-- | To get all lists of a user
-- /!\ lists of different types of corpora (Annuaire or Documents)
{-
listsWith :: HasNodeError err => Username -> Cmd err [Maybe ListId]
listsWith u = runOpaQuery (selectLists u)
where
...
...
@@ -53,7 +62,6 @@ listsWithJoin2 = leftJoin queryUserTable queryNodeTable cond12
where
cond12 (u,n) = user_id u .== _node_userId n
{-
listsWithJoin3 :: Query (NodeRead, (UserRead, NodeReadNull))
listsWithJoin3 = leftJoin3 queryUserTable queryNodeTable queryNodeTable cond12 cond23
where
...
...
@@ -61,5 +69,23 @@ listsWithJoin3 = leftJoin3 queryUserTable queryNodeTable queryNodeTable cond12 c
cond12 (u,n) = user_id u .== _node_userId n
cond23 :: (NodeRead, (UserRead, NodeReadNull)) -> Column PGBool
cond23 (n1,(u,n2)) = (toNullable $ _node_id n1) .== _node_parentId n2
--}
learnMetrics'
::
FlowCmdM
env
ServantErr
m
=>
CorpusId
->
Maybe
ListId
->
TabType
->
Maybe
Int
->
m
(
Map
.
Map
ListType
[
Vec
.
Vector
Double
])
learnMetrics'
cId
maybeListId
tabType
maybeLimit
=
do
(
ngs'
,
scores
)
<-
Metrics
.
getMetrics'
cId
maybeListId
tabType
maybeLimit
let
metrics
=
map
(
\
(
Scored
t
s1
s2
)
->
(
listType
t
ngs'
,
[
Vec
.
fromList
[
s1
,
s2
]]))
scores
listType
t
m
=
maybe
(
panic
errorMsg
)
fst
$
Map
.
lookup
t
m
errorMsg
=
"API.Node.metrics: key absent"
{-
_ <- Learn.grid 100 110 metrics' metrics'
--}
pure
$
Map
.
fromListWith
(
<>
)
metrics
src/Gargantext/Database/Metrics.hs
View file @
485666a2
...
...
@@ -68,7 +68,6 @@ getLocalMetrics cId maybeListId tabType maybeLimit = do
pure
(
ngs
,
ngs'
,
localMetrics
myCooc
)
getNgramsCooc
::
(
FlowCmdM
env
ServantErr
m
)
=>
CorpusId
->
Maybe
ListId
->
TabType
->
Maybe
Limit
->
m
(
Map
Text
(
ListType
,
Maybe
Text
)
...
...
src/Gargantext/Database/Root.hs
View file @
485666a2
...
...
@@ -50,3 +50,4 @@ selectRoot username = proc () -> do
restrict
-<
_node_userId
row
.==
(
user_id
users
)
returnA
-<
row
src/Gargantext/Text/List.hs
View file @
485666a2
...
...
@@ -25,6 +25,7 @@ import Gargantext.Core.Types (ListType(..), MasterCorpusId, UserCorpusId)
import
Gargantext.Database.Metrics.NgramsByNode
(
getTficf'
,
sortTficf
,
ngramsGroup
,
getNodesByNgramsUser
,
groupNodesByNgramsWith
)
import
Gargantext.Database.Schema.Ngrams
(
NgramsType
(
..
))
import
Gargantext.Database.Utils
(
Cmd
)
import
Gargantext.Text.List.Learn
(
Model
(
..
))
import
Gargantext.Prelude
--import Gargantext.Text.Terms (TermType(..))
import
qualified
Data.Char
as
Char
...
...
@@ -33,11 +34,23 @@ import qualified Data.Map as Map
import
qualified
Data.Set
as
Set
import
qualified
Data.Text
as
Text
data
NgramsListBuilder
=
BuilderStepO
{
stemSize
::
Int
,
stemX
::
Int
,
stopSize
::
Int
}
|
BuilderStep1
{
withModel
::
Model
}
|
BuilderStepN
{
withModel
::
Model
}
data
StopSize
=
StopSize
{
unStopSize
::
Int
}
-- | TODO improve grouping functions of Authors, Sources, Institutes..
buildNgramsLists
::
Lang
->
Int
->
Int
->
UserCorpusId
->
MasterCorpusId
buildNgramsLists
::
Lang
->
Int
->
Int
->
StopSize
->
UserCorpusId
->
MasterCorpusId
->
Cmd
err
(
Map
NgramsType
[
NgramsElement
])
buildNgramsLists
l
n
m
uCid
mCid
=
do
ngTerms
<-
buildNgramsTermsList
l
n
m
uCid
mCid
buildNgramsLists
l
n
m
s
uCid
mCid
=
do
ngTerms
<-
buildNgramsTermsList
l
n
m
s
uCid
mCid
othersTerms
<-
mapM
(
buildNgramsOthersList
uCid
identity
)
[
Authors
,
Sources
,
Institutes
]
pure
$
Map
.
unions
$
othersTerms
<>
[
ngTerms
]
...
...
@@ -54,13 +67,13 @@ buildNgramsOthersList uCid groupIt nt = do
]
-- TODO remove hard coded parameters
buildNgramsTermsList
::
Lang
->
Int
->
Int
->
UserCorpusId
->
MasterCorpusId
buildNgramsTermsList
::
Lang
->
Int
->
Int
->
StopSize
->
UserCorpusId
->
MasterCorpusId
->
Cmd
err
(
Map
NgramsType
[
NgramsElement
])
buildNgramsTermsList
l
n
m
uCid
mCid
=
do
buildNgramsTermsList
l
n
m
s
uCid
mCid
=
do
candidates
<-
sortTficf
<$>
getTficf'
uCid
mCid
NgramsTerms
(
ngramsGroup
l
n
m
)
--printDebug "candidate" (length candidates)
let
termList
=
toTermList
(
isStopTerm
.
fst
)
candidates
let
termList
=
toTermList
(
(
isStopTerm
s
)
.
fst
)
candidates
--let termList = toTermList ((\_ -> False) . fst) candidates
--printDebug "termlist" (length termList)
...
...
@@ -103,7 +116,7 @@ toTermList stop ns = map (toTermList' stop CandidateTerm) xs
a
=
3
b
=
400
isStopTerm
::
Text
->
Bool
isStopTerm
x
=
Text
.
length
x
<
3
||
any
isStopChar
(
Text
.
unpack
x
)
isStopTerm
::
StopSize
->
Text
->
Bool
isStopTerm
(
StopSize
n
)
x
=
Text
.
length
x
<
n
||
any
isStopChar
(
Text
.
unpack
x
)
where
isStopChar
c
=
not
(
c
`
elem
`
(
"- /()"
::
[
Char
])
||
Char
.
isAlpha
c
)
src/Gargantext/Text/List/Learn.hs
View file @
485666a2
...
...
@@ -57,21 +57,25 @@ trainList x y = (train x y) . trainList'
vecs2maps
=
map
(
IntMap
.
fromList
.
(
zip
[
1
..
])
.
Vec
.
toList
)
predictList
::
SVM
.
Model
->
[
Vec
.
Vector
Double
]
->
IO
[
Maybe
ListType
]
predictList
m
vs
=
map
(
fromListTypeId
.
round
)
<$>
predict
m
vs
predictList
::
Model
->
[
Vec
.
Vector
Double
]
->
IO
[
Maybe
ListType
]
predictList
(
ModelSVM
m
_
_
)
vs
=
map
(
fromListTypeId
.
round
)
<$>
predict
m
vs
------------------------------------------------------------------------
data
Model
=
ModelSVM
{
model
::
SVM
.
Model
}
data
Model
=
ModelSVM
{
modelSVM
::
SVM
.
Model
,
param1
::
Maybe
Double
,
param2
::
Maybe
Double
}
--{-
instance
SaveFile
Model
where
saveFile'
fp
(
ModelSVM
m
)
=
SVM
.
saveModel
m
fp
saveFile'
fp
(
ModelSVM
m
_
_
)
=
SVM
.
saveModel
m
fp
instance
ReadFile
Model
where
readFile'
fp
=
do
m
<-
SVM
.
loadModel
fp
pure
$
ModelSVM
m
pure
$
ModelSVM
m
Nothing
Nothing
--}
------------------------------------------------------------------------
-- | TODO
-- shuffle list
...
...
@@ -80,43 +84,53 @@ instance ReadFile Model
type
Train
=
Map
ListType
[
Vec
.
Vector
Double
]
type
Tests
=
Map
ListType
[
Vec
.
Vector
Double
]
type
Score
=
Double
type
Param
=
Double
grid
::
(
MonadReader
env
m
,
MonadIO
m
,
HasSettings
env
)
=>
(
Train
,
Tests
)
->
m
()
-- Map (ListType, Maybe ListType) Int
)
grid
(
m
,
_
)
=
do
=>
Param
->
Param
->
Train
->
[
Tests
]
->
m
(
Maybe
Model
)
grid
s
e
tr
te
=
do
let
grid'
::
(
MonadReader
env
m
,
MonadIO
m
,
HasSettings
env
)
=>
Double
->
Double
->
Map
ListType
[
Vec
.
Vector
Double
]
->
m
(
Double
,
(
Double
,
Double
))
grid'
x
y
ls
=
do
model'
<-
liftIO
$
trainList
x
y
ls
--fp <- saveFile (ModelSVM model')
--printDebug "file" fp
let
(
res
,
toGuess
)
=
List
.
unzip
$
List
.
concat
$
map
(
\
(
k
,
vs
)
->
zip
(
repeat
k
)
vs
)
$
Map
.
toList
ls
res'
<-
liftIO
$
predictList
model'
toGuess
pure
(
score''
$
score'
$
List
.
zip
res
res'
,
(
x
,
y
))
{-
score :: [(ListType, Maybe ListType)] -> Map (ListType, Maybe ListType) Int
score = occurrencesWith identity
-}
score'
::
[(
ListType
,
Maybe
ListType
)]
->
Map
(
Maybe
Bool
)
Int
score'
=
occurrencesWith
(
\
(
a
,
b
)
->
(
==
)
<$>
Just
a
<*>
b
)
score''
::
Map
(
Maybe
Bool
)
Int
->
Double
score''
m''
=
maybe
0
(
\
t
->
(
fromIntegral
t
)
/
total
)
(
Map
.
lookup
(
Just
True
)
m''
)
where
total
=
fromIntegral
$
foldl
(
+
)
0
$
Map
.
elems
m''
r
<-
List
.
take
10
.
List
.
reverse
.
(
List
.
sortOn
fst
)
<$>
mapM
(
\
(
x
,
y
)
->
grid'
x
y
m
)
[(
x
,
y
)
|
x
<-
[
500
..
510
],
y
<-
[
500
..
510
]]
printDebug
"GRID SEARCH"
r
-- save best result
->
Train
->
[
Tests
]
->
m
(
Score
,
Model
)
grid'
x
y
tr'
te'
=
do
model''
<-
liftIO
$
trainList
x
y
tr'
let
model'
=
ModelSVM
model''
(
Just
x
)
(
Just
y
)
score'
::
[(
ListType
,
Maybe
ListType
)]
->
Map
(
Maybe
Bool
)
Int
score'
=
occurrencesWith
(
\
(
a
,
b
)
->
(
==
)
<$>
Just
a
<*>
b
)
score''
::
Map
(
Maybe
Bool
)
Int
->
Double
score''
m''
=
maybe
0
(
\
t
->
(
fromIntegral
t
)
/
total
)
(
Map
.
lookup
(
Just
True
)
m''
)
where
total
=
fromIntegral
$
foldl
(
+
)
0
$
Map
.
elems
m''
getScore
m
t
=
do
let
(
res
,
toGuess
)
=
List
.
unzip
$
List
.
concat
$
map
(
\
(
k
,
vs
)
->
zip
(
repeat
k
)
vs
)
$
Map
.
toList
t
res'
<-
liftIO
$
predictList
m
toGuess
pure
$
score''
$
score'
$
List
.
zip
res
res'
score
<-
mapM
(
getScore
model'
)
te'
pure
(
mean
score
,
model'
)
r
<-
head
.
List
.
reverse
.
(
List
.
sortOn
fst
)
<$>
mapM
(
\
(
x
,
y
)
->
grid'
x
y
tr
te
)
[(
x
,
y
)
|
x
<-
[
s
..
e
],
y
<-
[
s
..
e
]]
printDebug
"GRID SEARCH"
(
map
fst
r
)
--printDebug "file" fp
--fp <- saveFile (ModelSVM model')
--save best result
pure
$
snd
<$>
r
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment