Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Christian Merten
haskell-gargantext
Commits
dec1cb78
Commit
dec1cb78
authored
Feb 27, 2019
by
Alexandre Delanoë
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
[FLOW/TEXT/DB] TFICF Done. Finally, need to connect all the components of the Flow now.
parent
42cba88f
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
31 additions
and
13 deletions
+31
-13
NgramsByNode.hs
src/Gargantext/Database/Metrics/NgramsByNode.hs
+25
-12
NodeNgram.hs
src/Gargantext/Database/Schema/NodeNgram.hs
+5
-1
TFICF.hs
src/Gargantext/Text/Metrics/TFICF.hs
+1
-0
No files found.
src/Gargantext/Database/Metrics/NgramsByNode.hs
View file @
dec1cb78
...
@@ -24,22 +24,41 @@ import Data.Set (Set)
...
@@ -24,22 +24,41 @@ import Data.Set (Set)
import
Data.Text
(
Text
)
import
Data.Text
(
Text
)
import
Data.Tuple.Extra
(
second
,
swap
)
import
Data.Tuple.Extra
(
second
,
swap
)
import
Database.PostgreSQL.Simple.SqlQQ
(
sql
)
import
Database.PostgreSQL.Simple.SqlQQ
(
sql
)
import
Gargantext.Core
(
Lang
(
..
))
import
Gargantext.Database.Config
(
nodeTypeId
)
import
Gargantext.Database.Config
(
nodeTypeId
)
import
Gargantext.Database.Schema.Ngrams
(
ngramsTypeId
,
NgramsType
(
..
))
import
Gargantext.Database.Schema.Ngrams
(
ngramsTypeId
,
NgramsType
(
..
))
import
Gargantext.Database.Types.Node
-- (ListId, CorpusId, NodeId)
import
Gargantext.Database.Types.Node
-- (ListId, CorpusId, NodeId)
import
Gargantext.Database.Utils
(
Cmd
,
runPGSQuery
)
import
Gargantext.Database.Utils
(
Cmd
,
runPGSQuery
)
import
Gargantext.Prelude
import
Gargantext.Prelude
import
Gargantext.Text.Metrics.TFICF
-- (tficf)
import
Gargantext.Text.Metrics.TFICF
-- (tficf)
import
Gargantext.Text.Terms.Mono.Stem
(
stem
)
import
qualified
Data.List
as
List
import
qualified
Data.Map.Strict
as
Map
import
qualified
Data.Map.Strict
as
Map
import
qualified
Data.Set
as
Set
import
qualified
Data.Set
as
Set
import
qualified
Data.Text
as
Text
import
qualified
Database.PostgreSQL.Simple
as
DPS
import
qualified
Database.PostgreSQL.Simple
as
DPS
-- | TODO: group with 2 terms only can be
-- discussed. Main purpose of this is offering
-- a first grouping option to user and get some
-- enriched data to better learn and improve that algo
ngramsGroup
::
Lang
->
Int
->
Text
->
Text
ngramsGroup
l
n
=
Text
.
intercalate
" "
.
map
(
stem
l
)
.
take
n
.
List
.
sort
.
Text
.
splitOn
" "
.
Text
.
replace
"-"
" "
sortTficf
::
(
Map
Text
(
Double
,
Set
Text
))
->
[(
Double
,
Set
Text
)]
sortTficf
=
List
.
reverse
.
List
.
sortOn
fst
.
elems
getTficf
::
UserCorpusId
->
MasterCorpusId
->
(
Text
->
Text
)
getTficf'
::
UserCorpusId
->
MasterCorpusId
->
(
Text
->
Text
)
->
Cmd
err
(
Map
Text
(
Double
,
Set
Text
))
->
Cmd
err
(
Map
Text
(
Double
,
Set
Text
))
getTficf
u
m
f
=
do
getTficf
'
u
m
f
=
do
u'
<-
getNodesByNgramsUser
u
u'
<-
getNodesByNgramsUser
u
m'
<-
getNodesByNgramsMaster
u
m
m'
<-
getNodesByNgramsMaster
u
m
...
@@ -54,8 +73,8 @@ type Infra = Context
...
@@ -54,8 +73,8 @@ type Infra = Context
toTficfData
::
Infra
->
Supra
toTficfData
::
Infra
->
Supra
->
Map
Text
(
Double
,
Set
Text
)
->
Map
Text
(
Double
,
Set
Text
)
toTficfData
(
ti
,
mi
)
(
ts
,
ms
)
=
toTficfData
(
ti
,
mi
)
(
ts
,
ms
)
=
fromList
[
(
t
,
(
tficf
(
TficfInfra
ti
n
)
fromList
[
(
t
,
(
tficf
(
TficfInfra
n
ti
)
(
TficfSupra
ts
$
maybe
0
fst
$
Map
.
lookup
t
m
s
)
(
TficfSupra
(
maybe
0
fst
$
Map
.
lookup
t
ms
)
t
s
)
,
ns
,
ns
)
)
)
)
...
@@ -63,7 +82,6 @@ toTficfData (ti, mi) (ts, ms) =
...
@@ -63,7 +82,6 @@ toTficfData (ti, mi) (ts, ms) =
]
]
-- | fst is size of Supra Corpus
-- | fst is size of Supra Corpus
-- snd is Texts and size of Occurrences (different docs)
-- snd is Texts and size of Occurrences (different docs)
countNodesByNgramsWith
::
(
Text
->
Text
)
countNodesByNgramsWith
::
(
Text
->
Text
)
...
@@ -72,7 +90,8 @@ countNodesByNgramsWith :: (Text -> Text)
...
@@ -72,7 +90,8 @@ countNodesByNgramsWith :: (Text -> Text)
countNodesByNgramsWith
f
m
=
(
total
,
m'
)
countNodesByNgramsWith
f
m
=
(
total
,
m'
)
where
where
total
=
fromIntegral
$
Set
.
size
$
Set
.
unions
$
elems
m
total
=
fromIntegral
$
Set
.
size
$
Set
.
unions
$
elems
m
m'
=
Map
.
map
(
swap
.
second
(
fromIntegral
.
Set
.
size
))
$
groupNodesByNgramsWith
f
m
m'
=
Map
.
map
(
swap
.
second
(
fromIntegral
.
Set
.
size
))
$
groupNodesByNgramsWith
f
m
groupNodesByNgramsWith
::
(
Text
->
Text
)
groupNodesByNgramsWith
::
(
Text
->
Text
)
...
@@ -157,10 +176,4 @@ SELECT nng.node_id, ng.id, ng.terms FROM nodes_ngrams nng
...
@@ -157,10 +176,4 @@ SELECT nng.node_id, ng.id, ng.terms FROM nodes_ngrams nng
SELECT m.node_id, m.terms FROM nodesByNgramsMaster m
SELECT m.node_id, m.terms FROM nodesByNgramsMaster m
RIGHT JOIN nodesByNgramsUser u ON u.id = m.id
RIGHT JOIN nodesByNgramsUser u ON u.id = m.id
|]
|]
src/Gargantext/Database/Schema/NodeNgram.hs
View file @
dec1cb78
...
@@ -173,13 +173,14 @@ data Action = Del | Add
...
@@ -173,13 +173,14 @@ data Action = Del | Add
type
NgramsParent
=
Text
type
NgramsParent
=
Text
type
NgramsChild
=
Text
type
NgramsChild
=
Text
{-
ngramsGroup :: Action -> ListId -> [(NgramsTypeId, NgramsParent, NgramsChild)] -> Cmd err ()
ngramsGroup :: Action -> ListId -> [(NgramsTypeId, NgramsParent, NgramsChild)] -> Cmd err ()
ngramsGroup _ _ [] = pure ()
ngramsGroup _ _ [] = pure ()
ngramsGroup a lid input = void $ trace (show input) $ execPGSQuery (ngramsGroupQuery a) (DPS.Only $ Values fields input')
ngramsGroup a lid input = void $ trace (show input) $ execPGSQuery (ngramsGroupQuery a) (DPS.Only $ Values fields input')
where
where
fields = map (\t-> QualifiedIdentifier Nothing t) ["int4","int4","text","int4","text","text"]
fields = map (\t-> QualifiedIdentifier Nothing t) ["int4","int4","text","int4","text","text"]
input' = map (\(ntpid,p,c) -> (lid, nodeTypeId NodeList, userMaster, ntpid, p,c)) input
input' = map (\(ntpid,p,c) -> (lid, nodeTypeId NodeList, userMaster, ntpid, p,c)) input
-}
ngramsGroupQuery
::
Action
->
DPS
.
Query
ngramsGroupQuery
::
Action
->
DPS
.
Query
ngramsGroupQuery
a
=
case
a
of
ngramsGroupQuery
a
=
case
a
of
...
@@ -290,6 +291,8 @@ data NodeNgramsUpdate = NodeNgramsUpdate
...
@@ -290,6 +291,8 @@ data NodeNgramsUpdate = NodeNgramsUpdate
-- TODO wrap these updates in a transaction.
-- TODO wrap these updates in a transaction.
-- TODO-ACCESS:
-- TODO-ACCESS:
-- * check userId CanUpdateNgrams userListId
-- * check userId CanUpdateNgrams userListId
{-
updateNodeNgrams :: NodeNgramsUpdate -> Cmd err ()
updateNodeNgrams :: NodeNgramsUpdate -> Cmd err ()
updateNodeNgrams nnu = do
updateNodeNgrams nnu = do
updateNodeNgrams' userListId $ _nnu_lists_update nnu
updateNodeNgrams' userListId $ _nnu_lists_update nnu
...
@@ -299,3 +302,4 @@ updateNodeNgrams nnu = do
...
@@ -299,3 +302,4 @@ updateNodeNgrams nnu = do
ngramsGroup Add userListId $ _nnu_add_children nnu
ngramsGroup Add userListId $ _nnu_add_children nnu
where
where
userListId = _nnu_user_list_id nnu
userListId = _nnu_user_list_id nnu
-}
src/Gargantext/Text/Metrics/TFICF.hs
View file @
dec1cb78
...
@@ -48,6 +48,7 @@ type InfraContext = TficfContext
...
@@ -48,6 +48,7 @@ type InfraContext = TficfContext
tficf
::
InfraContext
Double
Double
->
SupraContext
Double
Double
->
Double
tficf
::
InfraContext
Double
Double
->
SupraContext
Double
Double
->
Double
tficf
(
TficfCorpus
c
c'
)
(
TficfLanguage
l
l'
)
=
tficf'
c
c'
l
l'
tficf
(
TficfCorpus
c
c'
)
(
TficfLanguage
l
l'
)
=
tficf'
c
c'
l
l'
tficf
(
TficfDocument
d
d'
)(
TficfCorpus
c
c'
)
=
tficf'
d
d'
c
c'
tficf
(
TficfDocument
d
d'
)(
TficfCorpus
c
c'
)
=
tficf'
d
d'
c
c'
tficf
(
TficfInfra
d
d'
)(
TficfSupra
c
c'
)
=
tficf'
d
d'
c
c'
tficf
_
_
=
panic
"Not in definition"
tficf
_
_
=
panic
"Not in definition"
tficf'
::
Double
->
Double
->
Double
->
Double
->
Double
tficf'
::
Double
->
Double
->
Double
->
Double
->
Double
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment