Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
165
Issues
165
List
Board
Labels
Milestones
Merge Requests
10
Merge Requests
10
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
gargantext
haskell-gargantext
Commits
545bb1a3
Commit
545bb1a3
authored
Feb 27, 2019
by
Alexandre Delanoë
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
[FLOW] new TFICF function (full Haskell).
parent
a364ea38
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
96 additions
and
40 deletions
+96
-40
NgramsByNode.hs
src/Gargantext/Database/Metrics/NgramsByNode.hs
+78
-31
Node.hs
src/Gargantext/Database/Types/Node.hs
+3
-1
TFICF.hs
src/Gargantext/Text/Metrics/TFICF.hs
+15
-8
No files found.
src/Gargantext/Database/Metrics/NgramsByNode.hs
View file @
545bb1a3
...
...
@@ -19,38 +19,65 @@ Ngrams by node enable special metrics.
module
Gargantext.Database.Metrics.NgramsByNode
where
import
Data.Map.Strict
(
Map
,
fromListWith
,
{-elems,-}
to
List
)
import
Data.Map.Strict
(
Map
,
fromListWith
,
elems
,
toList
,
from
List
)
import
Data.Set
(
Set
)
import
Data.Text
(
Text
)
import
Data.Tuple.Extra
(
second
)
import
Data.Tuple.Extra
(
second
,
swap
)
import
Database.PostgreSQL.Simple.SqlQQ
(
sql
)
import
Gargantext.Database.Config
(
nodeTypeId
)
import
Gargantext.Database.Schema.Ngrams
(
ngramsTypeId
,
NgramsType
(
..
))
import
Gargantext.Database.Types.Node
-- (ListId, CorpusId, NodeId)
import
Gargantext.Database.Utils
(
Cmd
,
runPGSQuery
)
import
Gargantext.Prelude
import
Gargantext.Text.Metrics.TFICF
-- (tficf)
import
qualified
Data.Map.Strict
as
Map
import
qualified
Data.Set
as
Set
import
qualified
Database.PostgreSQL.Simple
as
DPS
type
GlobalNodeId
=
NodeId
type
LocalNodeId
=
NodeId
joinNodesByNgrams
::
Map
Text
(
Set
NodeId
)
->
Map
Text
(
Set
NodeId
)
->
Map
Text
(
Set
GlobalNodeId
,
Set
LocalNodeId
)
joinNodesByNgrams
=
undefined
getTficf
::
UserCorpusId
->
MasterCorpusId
->
(
Text
->
Text
)
->
Cmd
err
(
Map
Text
(
Double
,
Set
Text
))
getTficf
u
m
f
=
do
u'
<-
getNodesByNgramsUser
u
m'
<-
getNodesByNgramsMaster
u
m
countNodesByNgramsWith
::
(
Text
->
Text
)
->
Map
Text
(
Set
NodeId
)
->
Map
Text
(
Set
Text
,
Int
)
countNodesByNgramsWith
f
m
=
Map
.
map
(
second
Set
.
size
)
$
groupNodesByNgramsWith
f
m
pure
$
toTficfData
(
countNodesByNgramsWith
f
u'
)
(
countNodesByNgramsWith
f
m'
)
groupNodesByNgramsWith
::
(
Text
->
Text
)
->
Map
Text
(
Set
NodeId
)
->
Map
Text
(
Set
Text
,
Set
NodeId
)
type
Context
=
(
Double
,
Map
Text
(
Double
,
Set
Text
))
type
Supra
=
Context
type
Infra
=
Context
toTficfData
::
Infra
->
Supra
->
Map
Text
(
Double
,
Set
Text
)
toTficfData
(
ti
,
mi
)
(
ts
,
ms
)
=
fromList
[
(
t
,
(
tficf
(
TficfInfra
ti
n
)
(
TficfSupra
ts
$
maybe
0
fst
$
Map
.
lookup
t
ms
)
,
ns
)
)
|
(
t
,
(
n
,
ns
))
<-
toList
mi
]
-- | fst is size of Supra Corpus
-- snd is Texts and size of Occurrences (different docs)
countNodesByNgramsWith
::
(
Text
->
Text
)
->
Map
Text
(
Set
NodeId
)
->
(
Double
,
Map
Text
(
Double
,
Set
Text
))
countNodesByNgramsWith
f
m
=
(
total
,
m'
)
where
total
=
fromIntegral
$
Set
.
size
$
Set
.
unions
$
elems
m
m'
=
Map
.
map
(
swap
.
second
(
fromIntegral
.
Set
.
size
))
$
groupNodesByNgramsWith
f
m
groupNodesByNgramsWith
::
(
Text
->
Text
)
->
Map
Text
(
Set
NodeId
)
->
Map
Text
(
Set
Text
,
Set
NodeId
)
groupNodesByNgramsWith
f
m
=
fromListWith
(
\
a
b
->
(
fst
a
<>
fst
b
,
snd
a
<>
snd
b
))
$
map
(
\
(
t
,
ns
)
->
(
f
t
,
(
Set
.
singleton
t
,
ns
)))
...
...
@@ -84,15 +111,18 @@ queryNgramsByNodeUser = [sql|
|]
------------------------------------------------------------------------
-- | TODO filter by language, database, any social field
getNodesByNgramsMaster
::
UserCorpusId
->
MasterCorpusId
->
Cmd
err
(
Map
Text
(
Set
NodeId
))
getNodesByNgramsMaster
ucId
mcId
=
fromListWith
(
<>
)
<$>
map
(
\
(
n
,
t
)
->
(
t
,
Set
.
singleton
n
))
<$>
selectNgramsByNodeMaster
ucId
mcId
getNodesByNgramsMaster
::
CorpusId
->
Cmd
err
(
Map
Text
(
Set
NodeId
))
getNodesByNgramsMaster
cId
=
fromListWith
(
<>
)
<$>
map
(
\
(
n
,
t
)
->
(
t
,
Set
.
singleton
n
))
<$>
selectNgramsByNodeMaster
cId
selectNgramsByNodeMaster
::
CorpusId
->
Cmd
err
[(
NodeId
,
Text
)]
selectNgramsByNodeMaster
cId
=
runPGSQuery
selectNgramsByNodeMaster
::
UserCorpusId
->
MasterCorpusId
->
Cmd
err
[(
NodeId
,
Text
)]
selectNgramsByNodeMaster
ucId
mcId
=
runPGSQuery
queryNgramsByNodeMaster
(
cId
(
ucId
,
nodeTypeId
NodeDocument
,
ngramsTypeId
NgramsTerms
,
mcId
,
nodeTypeId
NodeDocument
,
ngramsTypeId
NgramsTerms
)
...
...
@@ -100,16 +130,33 @@ selectNgramsByNodeMaster cId = runPGSQuery
queryNgramsByNodeMaster
::
DPS
.
Query
queryNgramsByNodeMaster
=
[
sql
|
SELECT nng.node_id, ng.terms FROM nodes_ngrams nng
JOIN ngrams ng ON ng.id = nng.ngrams_id
JOIN nodes n ON n.id = nng.node_id
WHERE n.parent_id = ? -- Master Corpus NodeTypeId
AND n.typename = ? -- NodeTypeId
AND nng.ngrams_type = ? -- NgramsTypeId
GROUP BY nng.node_id, ng.terms
LIMIT 10000 -- TODO remove the hard limit and limit with corpus only
WITH nodesByNgramsUser AS (
SELECT nng.node_id, ng.id, ng.terms FROM nodes_ngrams nng
JOIN ngrams ng ON nng.ngrams_id = ng.id
JOIN nodes_nodes nn ON nn.node2_id = nng.node_id
JOIN nodes n ON nn.node2_id = n.id
WHERE nn.node1_id = ? -- UserCorpusId
AND n.typename = ? -- NodeTypeId
AND nng.ngrams_type = ? -- NgramsTypeId
AND nn.delete = False
GROUP BY nng.node_id, ng.id, ng.terms
),
nodesByNgramsMaster AS (
SELECT nng.node_id, ng.id, ng.terms FROM nodes_ngrams nng
JOIN ngrams ng ON ng.id = nng.ngrams_id
JOIN nodes n ON n.id = nng.node_id
WHERE n.parent_id = ? -- Master Corpus NodeTypeId
AND n.typename = ? -- NodeTypeId
AND nng.ngrams_type = ? -- NgramsTypeId
GROUP BY nng.node_id, ng.id, ng.terms)
SELECT m.node_id, m.terms FROM nodesByNgramsMaster m
RIGHT JOIN nodesByNgramsUser u ON u.id = m.id
|]
...
...
src/Gargantext/Database/Types/Node.hs
View file @
545bb1a3
...
...
@@ -87,7 +87,9 @@ type ListId = NodeId
type
DocumentId
=
NodeId
type
DocId
=
DocumentId
-- todo: remove this
type
RootId
=
NodeId
type
MasterCorpusId
=
NodeId
type
MasterCorpusId
=
CorpusId
type
UserCorpusId
=
CorpusId
type
AnnuaireId
=
NodeId
type
ContactId
=
NodeId
...
...
src/Gargantext/Text/Metrics/TFICF.hs
View file @
545bb1a3
...
...
@@ -20,17 +20,24 @@ module Gargantext.Text.Metrics.TFICF where
import
Gargantext.Prelude
import
Gargantext.Database.Schema.Ngrams
(
NgramsId
,
NgramsTerms
)
data
TficfContext
n
m
=
TficfLanguage
n
m
|
TficfCorpus
n
m
|
TficfDocument
n
m
data
TficfContext
n
m
=
TficfLanguage
n
m
|
TficfCorpus
n
m
|
TficfDocument
n
m
|
TficfInfra
n
m
|
TficfSupra
n
m
deriving
(
Show
)
data
Tficf
=
Tficf
{
tficf_ngramsId
::
NgramsId
,
tficf_ngramsTerms
::
NgramsTerms
,
tficf_score
::
Double
}
deriving
(
Show
)
data
Tficf
=
Tficf
{
tficf_ngramsId
::
NgramsId
,
tficf_ngramsTerms
::
NgramsTerms
,
tficf_score
::
Double
}
deriving
(
Show
)
data
Tficf'
=
Tficf'
{
tficf'_terms
::
NgramsTerms
,
tficf'_score
::
Double
}
deriving
(
Show
)
data
Tficf'
=
Tficf'
{
tficf'_terms
::
NgramsTerms
,
tficf'_score
::
Double
}
deriving
(
Show
)
type
SupraContext
=
TficfContext
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment