Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Julien Moutinho
haskell-gargantext
Commits
3b8711b9
Commit
3b8711b9
authored
Jan 20, 2019
by
Alexandre Delanoë
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
[Graph][WIP] Cooc 2 graph and missing file.
parent
0f2abe5f
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
155 additions
and
3 deletions
+155
-3
TFICF.hs
src/Gargantext/Database/Metrics/TFICF.hs
+145
-0
Flow.hs
src/Gargantext/Text/Flow.hs
+9
-2
Graph.hs
src/Gargantext/Viz/Graph.hs
+1
-1
No files found.
src/Gargantext/Database/Metrics/TFICF.hs
0 → 100644
View file @
3b8711b9
{-|
Module : Gargantext.Database.Metrics.TFICF
Description : Ngram connection to the Database
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
TFICF, generalization of TFIDF
-}
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE QuasiQuotes #-}
module
Gargantext.Database.Metrics.TFICF
where
import
Database.PostgreSQL.Simple.SqlQQ
(
sql
)
import
qualified
Database.PostgreSQL.Simple
as
DPS
import
Safe
(
headMay
)
import
Gargantext.Text.Metrics.TFICF
-- (tficf)
import
Gargantext.Prelude
import
Gargantext.Core.Types.Individu
(
UsernameMaster
)
import
Gargantext.Database.Utils
(
Cmd
,
runPGSQuery
)
import
Gargantext.Database.Types.Node
(
ListId
,
CorpusId
,
NodeType
(
..
))
import
Gargantext.Database.Config
(
nodeTypeId
)
import
Gargantext.Database.Schema.Ngrams
(
NgramsId
,
NgramsTerms
,
NgramsType
,
ngramsTypeId
)
type
OccGlobal
=
Double
type
OccCorpus
=
Double
getTficf
::
UsernameMaster
->
CorpusId
->
ListId
->
NgramsType
->
Cmd
err
[
Tficf
]
getTficf
u
cId
lId
ngType
=
do
g
<-
getTficfGlobal
u
c
<-
getTficfCorpus
cId
ngs
<-
getTficfNgrams
u
cId
lId
ngType
pure
$
map
(
\
(
nId
,
nTerms
,
wm
,
wn
)
->
Tficf
nId
nTerms
(
tficf
(
TficfCorpus
wn
(
fromIntegral
c
))
(
TficfLanguage
wm
(
fromIntegral
g
))
)
)
ngs
getTficfGlobal
::
UsernameMaster
->
Cmd
err
Int
getTficfGlobal
u
=
maybe
0
identity
<$>
headMay
<$>
map
(
\
(
DPS
.
Only
n
)
->
n
)
<$>
runPGSQuery
q
p
where
p
=
(
u
,
nodeTypeId
NodeDocument
)
q
=
[
sql
|
SELECT count(*) from nodes n
JOIN auth_user a ON a.id = n.user_id
WHERE
a.username = ?
AND n.typename = ?
|]
getTficfCorpus
::
CorpusId
->
Cmd
err
Int
getTficfCorpus
cId
=
maybe
0
identity
<$>
headMay
<$>
map
(
\
(
DPS
.
Only
n
)
->
n
)
<$>
runPGSQuery
q
p
where
p
=
(
cId
,
nodeTypeId
NodeDocument
)
q
=
[
sql
|
WITH input(corpusId, typename) AS ((VALUES(?::"int4",?::"int4")))
SELECT count(*) from nodes_nodes AS nn
JOIN nodes AS n ON n.id = nn.node2_id
JOIN input ON nn.node1_id = input.corpusId
WHERE n.typename = input.typename;
|]
getTficfNgrams
::
UsernameMaster
->
CorpusId
->
ListId
->
NgramsType
->
Cmd
err
[(
NgramsId
,
NgramsTerms
,
OccGlobal
,
OccCorpus
)]
getTficfNgrams
u
cId
lId
ngType
=
runPGSQuery
queryTficf
p
where
p
=
(
u
,
nodeTypeId
NodeList
,
nodeTypeId
NodeDocument
,
ngramsTypeId
ngType
,
cId
,
lId
)
queryTficf
::
DPS
.
Query
queryTficf
=
[
sql
|
-- TODO add CTE for groups
WITH input(masterUsername,typenameList,typenameDoc,ngramsTypeId,corpusId,listId)
AS ((VALUES(?::"text", ? :: "int4", ?::"int4", ?::"int4",?::"int4",?::"int4"))),
-- AS ((VALUES('gargantua'::"text", 5 :: "int4", 4::"int4", 4::"int4",1018::"int4",1019::"int4"))),
list_master AS (
SELECT n.id,n.name,n.user_id from nodes n
JOIN input ON n.typename = input.typenameList
JOIN auth_user a ON a.id = n.user_id
WHERE
a.username = input.masterUsername
),
ngrams_master AS (
SELECT ng.id, ng.terms, SUM(nng2.weight) AS weight FROM nodes_ngrams nng
JOIN list_master ON list_master.id = nng.node_id
JOIN nodes_ngrams nng2 ON nng2.ngrams_id = nng.ngrams_id
JOIN nodes n ON n.id = nng2.node_id
JOIN input ON input.typenameDoc = n.typename
JOIN ngrams ng ON ng.id = nng2.ngrams_id
WHERE
nng.ngrams_type = input.ngramsTypeId
-- AND n.hyperdata -> 'lang' = 'en'
GROUP BY ng.id,ng.terms
),
ngrams_user AS (
SELECT ng.id, ng.terms, SUM(nng2.weight) AS weight
FROM nodes_ngrams nng
JOIN list_master ON list_master.id = nng.node_id
JOIN nodes_ngrams nng2 ON nng2.ngrams_id = nng.ngrams_id
JOIN nodes_nodes nn ON nn.node2_id = nng2.node_id
JOIN ngrams ng ON ng.id = nng2.ngrams_id
JOIN input ON nn.node1_id = input.corpusId
WHERE
nng.ngrams_type = input.ngramsTypeId
-- AND n.hyperdata -> 'lang' = 'en'
GROUP BY ng.id,ng.terms
)
SELECT nu.id,nu.terms,SUM(nm.weight) wm,SUM(nu.weight) wu
FROM ngrams_user nu
JOIN ngrams_master nm ON nm.id = nu.id
WHERE
nm.weight > 1
AND
nu.weight > 1
GROUP BY nu.id,nu.terms
--ORDER BY wm DESC
--LIMIT 1000
|]
src/Gargantext/Text/Flow.hs
View file @
3b8711b9
...
...
@@ -23,8 +23,10 @@ import GHC.IO (FilePath)
import
qualified
Data.Text
as
T
import
Data.Text.IO
(
readFile
)
import
Data.Map.Strict
(
Map
)
import
Data.Maybe
(
catMaybes
)
import
qualified
Data.Set
as
DS
import
Data.Text
(
Text
)
import
qualified
Data.Array.Accelerate
as
A
import
qualified
Data.Map.Strict
as
M
...
...
@@ -116,7 +118,12 @@ textFlow' termType contexts = do
let
myCooc2
=
M
.
filter
(
>
0
)
myCooc1
printDebug
"myCooc2 size"
(
M
.
size
myCooc2
)
printDebug
"myCooc2"
myCooc2
g
<-
cooc2graph
myCooc2
pure
g
-- TODO use Text only here instead of [Text]
cooc2graph
::
(
Map
([
Text
],
[
Text
])
Int
)
->
IO
Graph
cooc2graph
myCooc
=
do
-- Filtering terms with inclusion/Exclusion and Specificity/Genericity scores
let
myCooc3
=
filterCooc
(
FilterConfig
(
MapListSize
350
)
...
...
@@ -124,7 +131,7 @@ textFlow' termType contexts = do
(
SampleBins
10
)
(
Clusters
3
)
(
DefaultValue
0
)
)
myCooc
2
)
myCooc
printDebug
"myCooc3 size"
$
M
.
size
myCooc3
printDebug
"myCooc3"
myCooc3
...
...
@@ -146,7 +153,7 @@ textFlow' termType contexts = do
--let distanceMat = distributional matCooc
printDebug
"distanceMat shape"
$
A
.
arrayShape
distanceMat
printDebug
"distanceMat"
distanceMat
--
--let distanceMap = M.filter (>0) $ mat2map distanceMat
let
distanceMap
=
M
.
map
(
\
_
->
1
)
$
M
.
filter
(
>
0
)
$
mat2map
distanceMat
printDebug
"distanceMap size"
$
M
.
size
distanceMap
...
...
src/Gargantext/Viz/Graph.hs
View file @
3b8711b9
...
...
@@ -193,7 +193,7 @@ graphV3ToGraph (GraphV3 links nodes) = Graph (map nodeV32node nodes) (zipWith li
nodeV32node
::
NodeV3
->
Node
nodeV32node
(
NodeV3
no_id'
(
AttributesV3
cl'
)
no_s'
no_lb'
)
=
Node
no_s'
Terms
(
cs
$
show
no_id'
)
no_lb'
(
Attributes
cl'
)
linkV32edge
::
Int
->
EdgeV3
->
Edge
linkV32edge
n
(
EdgeV3
eo_s'
eo_t'
eo_w'
)
=
Edge
(
cs
$
show
eo_s'
)
(
cs
$
show
eo_t'
)
((
T
.
read
$
T
.
unpack
eo_w'
)
::
Double
)
(
cs
$
show
n
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment