Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
163
Issues
163
List
Board
Labels
Milestones
Merge Requests
9
Merge Requests
9
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
gargantext
haskell-gargantext
Commits
020e78de
Commit
020e78de
authored
Mar 12, 2019
by
Alexandre Delanoë
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
[FLOW.GRAPH.METRICS] spec gen inc exc.
parent
07305554
Pipeline
#267
failed with stage
Changes
5
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
103 additions
and
125 deletions
+103
-125
Node.hs
src/Gargantext/API/Node.hs
+12
-10
Flow.hs
src/Gargantext/Database/Flow.hs
+4
-8
NgramsByNode.hs
src/Gargantext/Database/Metrics/NgramsByNode.hs
+27
-21
List.hs
src/Gargantext/Text/List.hs
+15
-24
Metrics.hs
src/Gargantext/Text/Metrics.hs
+45
-62
No files found.
src/Gargantext/API/Node.hs
View file @
020e78de
...
...
@@ -282,8 +282,8 @@ graphAPI nId = do
]
-- (map (\n -> LegendField n "#FFFFFF" (pack $ show n)) [1..10])
let
cId
=
maybe
(
panic
"no parentId"
)
identity
$
_node_parentId
nodeGraph
lId
<-
defaultList
cId
lId
<-
defaultList
cId
ngs
<-
filterListWithRoot
GraphTerm
<$>
mapTermListRoot
[
lId
]
NgramsTerms
myCooc
<-
Map
.
filter
(
>
1
)
<$>
getCoocByNgrams
...
...
@@ -296,7 +296,7 @@ graphAPI nId = do
instance
HasNodeError
ServantErr
where
_NodeError
=
prism'
mk
(
const
Nothing
)
-- $ panic "HasNodeError ServantErr: not a prism")
where
e
=
"NodeError: "
e
=
"
Gargantext.
NodeError: "
mk
NoListFound
=
err404
{
errBody
=
e
<>
"No list found"
}
mk
NoRootFound
=
err404
{
errBody
=
e
<>
"No Root found"
}
mk
NoCorpusFound
=
err404
{
errBody
=
e
<>
"No Corpus found"
}
...
...
@@ -333,18 +333,20 @@ rename nId (RenameNode name') = U.update (U.Rename nId name')
getTable
::
NodeId
->
Maybe
TabType
->
Maybe
Offset
->
Maybe
Limit
->
Maybe
OrderBy
->
Cmd
err
[
FacetDoc
]
getTable
cId
ft
o
l
order
=
case
ft
of
(
Just
Docs
)
->
runViewDocuments
cId
False
o
l
order
(
Just
Trash
)
->
runViewDocuments
cId
True
o
l
order
_
->
panic
"not implemented"
getTable
cId
ft
o
l
order
=
case
ft
of
(
Just
Docs
)
->
runViewDocuments
cId
False
o
l
order
(
Just
Trash
)
->
runViewDocuments
cId
True
o
l
order
_
->
panic
"not implemented"
getPairing
::
ContactId
->
Maybe
TabType
->
Maybe
Offset
->
Maybe
Limit
->
Maybe
OrderBy
->
Cmd
err
[
FacetDoc
]
getPairing
cId
ft
o
l
order
=
case
ft
of
(
Just
Docs
)
->
runViewAuthorsDoc
cId
False
o
l
order
(
Just
Trash
)
->
runViewAuthorsDoc
cId
True
o
l
order
_
->
panic
"not implemented"
getPairing
cId
ft
o
l
order
=
case
ft
of
(
Just
Docs
)
->
runViewAuthorsDoc
cId
False
o
l
order
(
Just
Trash
)
->
runViewAuthorsDoc
cId
True
o
l
order
_
->
panic
"not implemented"
getChart
::
NodeId
->
Maybe
UTCTime
->
Maybe
UTCTime
...
...
src/Gargantext/Database/Flow.hs
View file @
020e78de
...
...
@@ -83,6 +83,7 @@ flowCorpus u cn ff fp = do
ids
<-
flowCorpusMaster
ff
fp
flowCorpusUser
u
cn
ids
-- TODO query with complex query
flowCorpusSearchInDatabase
::
FlowCmdM
env
ServantErr
m
=>
Username
->
Text
->
m
CorpusId
flowCorpusSearchInDatabase
u
q
=
do
...
...
@@ -112,11 +113,7 @@ flowCorpusUser userName corpusName ids = do
-- User List Flow
(
_masterUserId
,
_masterRootId
,
masterCorpusId
)
<-
getOrMkRootWithCorpus
userMaster
""
-- /!\ this extract NgramsTerms Only
ngs
<-
buildNgramsLists
userCorpusId
masterCorpusId
--printDebug "ngs" ngs
--TODO getNgramsElement of NgramsType...
--ngs <- getNgramsElementsWithParentNodeId masterCorpusId
ngs
<-
buildNgramsLists
userCorpusId
masterCorpusId
userListId
<-
flowList
userId
userCorpusId
ngs
printDebug
"userListId"
userListId
...
...
@@ -124,11 +121,10 @@ flowCorpusUser userName corpusName ids = do
_
<-
mkGraph
userCorpusId
userId
-- User Dashboard Flow
_
<-
mkDashboard
userCorpusId
userId
--
_ <- mkDashboard userCorpusId userId
-- Annuaire Flow
-- _ <- mkAnnuaire rootUserId userId
pure
userCorpusId
...
...
@@ -142,7 +138,7 @@ insertMasterDocs hs = do
ids
<-
insertDocuments
masterUserId
masterCorpusId
NodeDocument
hyperdataDocuments'
let
documentsWithId
=
mergeData
(
toInserted
ids
)
(
toInsert
hs
)
docsWithNgrams
<-
documentIdWithNgrams
extractNgramsT
documentsWithId
docsWithNgrams
<-
documentIdWithNgrams
extractNgramsT
documentsWithId
let
maps
=
mapNodeIdNgrams
docsWithNgrams
...
...
src/Gargantext/Database/Metrics/NgramsByNode.hs
View file @
020e78de
...
...
@@ -24,13 +24,12 @@ import Data.Set (Set)
import
Data.Text
(
Text
)
import
Data.Tuple.Extra
(
second
,
swap
)
import
Database.PostgreSQL.Simple.SqlQQ
(
sql
)
import
Database.PostgreSQL.Simple.Types
(
Values
(
..
),
QualifiedIdentifier
(
..
))
import
Gargantext.Core
(
Lang
(
..
))
import
Gargantext.Database.Config
(
nodeTypeId
)
import
Gargantext.Database.Schema.Ngrams
(
ngramsTypeId
,
NgramsType
(
..
))
import
Gargantext.Database.Types.Node
-- (ListId, CorpusId, NodeId)
import
Database.PostgreSQL.Simple.Types
(
Values
(
..
),
QualifiedIdentifier
(
..
))
import
Gargantext.Database.Utils
(
Cmd
,
runPGSQuery
)
import
Gargantext.Prelude
import
Gargantext.Text.Metrics.TFICF
import
Gargantext.Text.Terms.Mono.Stem
(
stem
)
...
...
@@ -54,7 +53,7 @@ ngramsGroup l n = Text.intercalate " "
sortTficf
::
(
Map
Text
(
Double
,
Set
Text
))
->
[
(
Text
,
(
Double
,
Set
Text
))]
->
[
(
Text
,
(
Double
,
Set
Text
))]
sortTficf
=
List
.
sortOn
(
fst
.
snd
)
.
toList
...
...
@@ -69,8 +68,8 @@ getTficf' u m f = do
type
Context
=
(
Double
,
Map
Text
(
Double
,
Set
Text
))
type
Supra
=
Context
type
Infra
=
Context
type
Supra
=
Context
type
Infra
=
Context
toTficfData
::
Infra
->
Supra
->
Map
Text
(
Double
,
Set
Text
)
...
...
@@ -105,17 +104,20 @@ groupNodesByNgramsWith f m =
$
toList
m
------------------------------------------------------------------------
getNodesByNgramsUser
::
CorpusId
->
NgramsType
->
Cmd
err
(
Map
Text
(
Set
NodeId
))
getNodesByNgramsUser
cId
nt
=
fromListWith
(
<>
)
<$>
map
(
\
(
n
,
t
)
->
(
t
,
Set
.
singleton
n
))
<$>
selectNgramsByNodeUser
cId
nt
selectNgramsByNodeUser
::
CorpusId
->
NgramsType
->
Cmd
err
[(
NodeId
,
Text
)]
selectNgramsByNodeUser
cId
nt
=
runPGSQuery
queryNgramsByNodeUser
(
cId
,
nodeTypeId
NodeDocument
,
ngramsTypeId
nt
)
getNodesByNgramsUser
::
CorpusId
->
NgramsType
->
Cmd
err
(
Map
Text
(
Set
NodeId
))
getNodesByNgramsUser
cId
nt
=
fromListWith
(
<>
)
<$>
map
(
\
(
n
,
t
)
->
(
t
,
Set
.
singleton
n
))
<$>
selectNgramsByNodeUser
cId
nt
selectNgramsByNodeUser
::
CorpusId
->
NgramsType
->
Cmd
err
[(
NodeId
,
Text
)]
selectNgramsByNodeUser
cId
nt
=
runPGSQuery
queryNgramsByNodeUser
(
cId
,
nodeTypeId
NodeDocument
,
ngramsTypeId
nt
)
queryNgramsByNodeUser
::
DPS
.
Query
queryNgramsByNodeUser
=
[
sql
|
...
...
@@ -137,13 +139,17 @@ getOccByNgramsOnly :: CorpusId -> NgramsType -> [Text]
getOccByNgramsOnly
cId
nt
ngs
=
Map
.
map
Set
.
size
<$>
getNodesByNgramsOnlyUser
cId
nt
ngs
getNodesByNgramsOnlyUser
::
CorpusId
->
NgramsType
->
[
Text
]
->
Cmd
err
(
Map
Text
(
Set
NodeId
))
getNodesByNgramsOnlyUser
cId
nt
ngs
=
fromListWith
(
<>
)
<$>
map
(
\
(
n
,
t
)
->
(
t
,
Set
.
singleton
n
))
<$>
selectNgramsOnlyByNodeUser
cId
nt
ngs
getNodesByNgramsOnlyUser
::
CorpusId
->
NgramsType
->
[
Text
]
->
Cmd
err
(
Map
Text
(
Set
NodeId
))
getNodesByNgramsOnlyUser
cId
nt
ngs
=
fromListWith
(
<>
)
<$>
map
(
\
(
n
,
t
)
->
(
t
,
Set
.
singleton
n
))
<$>
selectNgramsOnlyByNodeUser
cId
nt
ngs
selectNgramsOnlyByNodeUser
::
CorpusId
->
NgramsType
->
[
Text
]
->
Cmd
err
[(
NodeId
,
Text
)]
selectNgramsOnlyByNodeUser
cId
nt
tms
=
runPGSQuery
queryNgramsOnlyByNodeUser
(
DPS
.
Only
$
Values
fields
tms'
)
selectNgramsOnlyByNodeUser
::
CorpusId
->
NgramsType
->
[
Text
]
->
Cmd
err
[(
NodeId
,
Text
)]
selectNgramsOnlyByNodeUser
cId
nt
tms
=
runPGSQuery
queryNgramsOnlyByNodeUser
(
DPS
.
Only
$
Values
fields
tms'
)
where
fields
=
map
(
\
t
->
QualifiedIdentifier
Nothing
t
)
[
"text"
,
"int4"
,
"int4"
,
"int4"
]
tms'
=
map
(
\
t
->
(
t
,
cId
,
nodeTypeId
NodeDocument
,
ngramsTypeId
nt
))
tms
...
...
src/Gargantext/Text/List.hs
View file @
020e78de
{-|
Module : Gargantext.Text.Ngrams.Lists
Description :
Description :
Tools to build lists
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
Here is a longer description of this module, containing some
commentary with @some markup@.
-}
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE RankNTypes #-}
module
Gargantext.Text.List
where
import
Data.Set
(
Set
)
import
Data.Map
(
Map
)
import
Data.Set
(
Set
)
import
Data.Text
(
Text
)
import
Gargantext.API.Ngrams
(
NgramsElement
,
mkNgramsElement
,
RootParent
(
..
),
mSetFromList
)
import
Gargantext.Core
(
Lang
(
..
))
...
...
@@ -30,11 +26,11 @@ import Gargantext.Database.Metrics.NgramsByNode (getTficf', sortTficf, ngramsGro
import
Gargantext.Database.Schema.Ngrams
(
NgramsType
(
..
))
import
Gargantext.Database.Utils
(
Cmd
)
import
Gargantext.Prelude
import
qualified
Data.Char
as
Char
import
qualified
Data.List
as
List
import
qualified
Data.Map
as
Map
import
qualified
Data.Set
as
Set
import
qualified
Data.Text
as
Text
import
qualified
Data.List
as
List
import
qualified
Data.Char
as
Char
-- | TODO improve grouping functions of Authors, Sources, Institutes..
buildNgramsLists
::
UserCorpusId
->
MasterCorpusId
...
...
@@ -56,20 +52,19 @@ buildNgramsOthersList uCid groupIt nt = do
)
]
-- TODO remove hard coded parameters
buildNgramsTermsList
::
UserCorpusId
->
MasterCorpusId
->
Cmd
err
(
Map
NgramsType
[
NgramsElement
])
->
Cmd
err
(
Map
NgramsType
[
NgramsElement
])
buildNgramsTermsList
uCid
mCid
=
do
candidates
<-
sortTficf
<$>
getTficf'
uCid
mCid
(
ngramsGroup
EN
2
)
--printDebug "candidate" (length candidates)
candidates
<-
sortTficf
<$>
getTficf'
uCid
mCid
(
ngramsGroup
EN
2
)
--printDebug "candidate" (length candidates)
let
termList
=
toTermList
(
isStopTerm
.
fst
)
candidates
--printDebug "termlist" (length termList)
let
termList
=
toTermList
(
isStopTerm
.
fst
)
candidates
--printDebug "termlist" (length termList)
let
ngs
=
List
.
concat
$
map
toNgramsElement
termList
let
ngs
=
List
.
concat
$
map
toNgramsElement
termList
pure
$
Map
.
fromList
[(
NgramsTerms
,
ngs
)]
pure
$
Map
.
fromList
[(
NgramsTerms
,
ngs
)]
toNgramsElement
::
(
ListType
,
(
Text
,
(
Double
,
Set
Text
)))
->
[
NgramsElement
]
...
...
@@ -87,10 +82,7 @@ toNgramsElement (listType, (_stem, (_score, setNgrams))) =
(
mSetFromList
[]
)
)
children
-- TODO remove hard coded parameters
toTermList
::
(
a
->
Bool
)
->
[
a
]
->
[(
ListType
,
a
)]
toTermList
stop
ns
=
map
(
toTermList'
stop
CandidateTerm
)
xs
<>
map
(
toTermList'
stop
GraphTerm
)
ys
...
...
@@ -106,8 +98,8 @@ toTermList stop ns = map (toTermList' stop CandidateTerm) xs
ys
=
take
b
$
drop
a
ns
zs
=
drop
b
$
drop
a
ns
a
=
1
b
=
10
000
a
=
1
0
b
=
3
000
isStopTerm
::
Text
->
Bool
isStopTerm
x
=
Text
.
length
x
<
3
...
...
@@ -118,4 +110,3 @@ isStopTerm x = Text.length x < 3
.
Text
.
replace
"/"
""
)
x
src/Gargantext/Text/Metrics.hs
View file @
020e78de
...
...
@@ -22,7 +22,7 @@ module Gargantext.Text.Metrics
--import Debug.Trace (trace)
--import Math.KMeans (kmeans, euclidSq, elements)
import
Data.Map
(
Map
)
import
Data.
Ord
(
Down
(
..
)
)
import
Data.
List.Extra
(
sortOn
)
import
GHC.Real
(
round
)
import
Gargantext.Prelude
import
Gargantext.Viz.Graph.Distances.Matrice
...
...
@@ -32,65 +32,14 @@ import qualified Data.Array.Accelerate.Interpreter as DAA
import
qualified
Data.List
as
L
import
qualified
Data.Map
as
M
data
MapListSize
=
MapListSize
Int
data
InclusionSize
=
InclusionSize
Int
data
SampleBins
=
SampleBins
Double
data
Clusters
=
Clusters
Int
data
DefaultValue
=
DefaultValue
Int
data
FilterConfig
=
FilterConfig
{
fc_mapListSize
::
MapListSize
,
fc_inclusionSize
::
InclusionSize
,
fc_sampleBins
::
SampleBins
,
fc_clusters
::
Clusters
,
fc_defaultValue
::
DefaultValue
}
filterCooc
::
(
Show
t
,
Ord
t
)
=>
FilterConfig
->
Map
(
t
,
t
)
Int
->
Map
(
t
,
t
)
Int
filterCooc
fc
cc
=
(
filterCooc'
fc
)
ts
cc
where
ts
=
map
_scored_terms
$
takeSome
fc
$
coocScored
cc
filterCooc'
::
(
Show
t
,
Ord
t
)
=>
FilterConfig
->
[
t
]
->
Map
(
t
,
t
)
Int
->
Map
(
t
,
t
)
Int
filterCooc'
(
FilterConfig
_
_
_
_
(
DefaultValue
dv
))
ts
m
=
-- trace ("coocScored " <> show ts) $
foldl'
(
\
m'
k
->
M
.
insert
k
(
maybe
dv
identity
$
M
.
lookup
k
m
)
m'
)
M
.
empty
(
listToCombi
identity
ts
)
-- | Map list creation
-- Kmeans split into (Clusters::Int) main clusters with Inclusion/Exclusion (relevance score)
-- Sample the main cluster ordered by specificity/genericity in (SampleBins::Double) parts
-- each parts is then ordered by Inclusion/Exclusion
-- take n scored terms in each parts where n * SampleBins = MapListSize.
takeSome
::
Ord
t
=>
FilterConfig
->
[
Scored
t
]
->
[
Scored
t
]
takeSome
(
FilterConfig
(
MapListSize
l
)
(
InclusionSize
l'
)
(
SampleBins
s
)
(
Clusters
_
)
_
)
scores
=
L
.
take
l
$
takeSample
n
m
$
L
.
take
l'
$
reverse
$
sortWith
(
Down
.
_scored_incExc
)
scores
-- splitKmeans k scores
where
-- TODO: benchmark with accelerate-example kmeans version
--splitKmeans x xs = L.concat $ map elements
-- $ V.take (k-1)
-- $ kmeans (\i -> VU.fromList ([(_scored_incExc i :: Double)]))
-- euclidSq x xs
n
=
round
((
fromIntegral
l
)
/
s
)
m
=
round
$
(
fromIntegral
$
length
scores
)
/
(
s
)
takeSample
n'
m'
xs
=
-- trace ("splitKmeans " <> show (length xs)) $
L
.
concat
$
map
(
L
.
take
n'
)
$
map
(
sortWith
(
Down
.
_scored_incExc
))
-- TODO use kmeans s instead of splitEvery
-- in order to split in s heteregenous parts
-- without homogeneous order hypothesis
$
splitEvery
m'
$
sortWith
(
Down
.
_scored_speGen
)
xs
type
GraphListSize
=
Int
type
InclusionSize
=
Int
takeScored
::
Ord
t
=>
GraphListSize
->
InclusionSize
->
Map
(
t
,
t
)
Int
->
[
t
]
takeScored
listSize
incSize
=
map
_scored_terms
.
linearTakes
listSize
incSize
_scored_speGen
_scored_incExc
.
scored
data
Scored
ts
=
Scored
{
_scored_terms
::
!
ts
...
...
@@ -98,11 +47,45 @@ data Scored ts = Scored
,
_scored_speGen
::
!
SpecificityGenericity
}
deriving
(
Show
)
-- TODO in the textflow we end up needing these indices, it might be better
-- to compute them earlier and pass them around.
coocS
cored
::
Ord
t
=>
Map
(
t
,
t
)
Int
->
[
Scored
t
]
coocS
cored
m
=
zipWith
(
\
(
_
,
t
)
(
inc
,
spe
)
->
Scored
t
inc
spe
)
(
M
.
toList
fi
)
scores
s
cored
::
Ord
t
=>
Map
(
t
,
t
)
Int
->
[
Scored
t
]
s
cored
m
=
zipWith
(
\
(
_
,
t
)
(
inc
,
spe
)
->
Scored
t
inc
spe
)
(
M
.
toList
fi
)
scores
where
(
ti
,
fi
)
=
createIndices
m
(
is
,
ss
)
=
incExcSpeGen
$
cooc2mat
ti
m
scores
=
DAA
.
toList
$
DAA
.
run
$
DAA
.
zip
(
DAA
.
use
is
)
(
DAA
.
use
ss
)
scores
=
DAA
.
toList
$
DAA
.
run
$
DAA
.
zip
(
DAA
.
use
is
)
(
DAA
.
use
ss
)
-- | Filter Scored data
-- >>> linearTakes 2 3 fst snd $ Prelude.zip ([1..10] :: [Int]) (reverse $ [1..10] :: [Int])
-- [(3,8),(6,5)]
linearTakes
::
(
Ord
b1
,
Ord
b2
)
=>
GraphListSize
->
InclusionSize
->
(
a
->
b2
)
->
(
a
->
b1
)
->
[
a
]
->
[
a
]
linearTakes
gls
incSize
speGen
incExc
=
take
gls
.
L
.
concat
.
map
(
take
$
round
$
(
fromIntegral
gls
::
Double
)
/
(
fromIntegral
incSize
::
Double
)
)
.
map
(
sortOn
incExc
)
.
splitEvery
incSize
.
sortOn
speGen
-- | Filters
{- splitKmeans k scores
TODO: benchmark with accelerate-example kmeans version
splitKmeans x xs = L.concat $ map elements
$ V.take (k-1)
$ kmeans (\i -> VU.fromList ([(_scored_incExc i :: Double)]))
euclidSq x xs
-- Kmeans split into (Clusters::Int) main clusters with Inclusion/Exclusion (relevance score)
-- Sample the main cluster ordered by specificity/genericity in (SampleBins::Double) parts
-- each parts is then ordered by Inclusion/Exclusion
-- take n scored terms in each parts where n * SampleBins = MapListSize.
-}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment