Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Christian Merten
haskell-gargantext
Commits
dec1cb78
Commit
dec1cb78
authored
Feb 27, 2019
by
Alexandre Delanoë
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
[FLOW/TEXT/DB] TFICF Done. Finally, need to connect all the components of the Flow now.
parent
42cba88f
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
31 additions
and
13 deletions
+31
-13
NgramsByNode.hs
src/Gargantext/Database/Metrics/NgramsByNode.hs
+25
-12
NodeNgram.hs
src/Gargantext/Database/Schema/NodeNgram.hs
+5
-1
TFICF.hs
src/Gargantext/Text/Metrics/TFICF.hs
+1
-0
No files found.
src/Gargantext/Database/Metrics/NgramsByNode.hs
View file @
dec1cb78
...
...
@@ -24,22 +24,41 @@ import Data.Set (Set)
import
Data.Text
(
Text
)
import
Data.Tuple.Extra
(
second
,
swap
)
import
Database.PostgreSQL.Simple.SqlQQ
(
sql
)
import
Gargantext.Core
(
Lang
(
..
))
import
Gargantext.Database.Config
(
nodeTypeId
)
import
Gargantext.Database.Schema.Ngrams
(
ngramsTypeId
,
NgramsType
(
..
))
import
Gargantext.Database.Types.Node
-- (ListId, CorpusId, NodeId)
import
Gargantext.Database.Utils
(
Cmd
,
runPGSQuery
)
import
Gargantext.Prelude
import
Gargantext.Text.Metrics.TFICF
-- (tficf)
import
Gargantext.Text.Terms.Mono.Stem
(
stem
)
import
qualified
Data.List
as
List
import
qualified
Data.Map.Strict
as
Map
import
qualified
Data.Set
as
Set
import
qualified
Data.Text
as
Text
import
qualified
Database.PostgreSQL.Simple
as
DPS
-- | TODO: group with 2 terms only can be
-- discussed. Main purpose of this is offering
-- a first grouping option to user and get some
-- enriched data to better learn and improve that algo
ngramsGroup
::
Lang
->
Int
->
Text
->
Text
ngramsGroup
l
n
=
Text
.
intercalate
" "
.
map
(
stem
l
)
.
take
n
.
List
.
sort
.
Text
.
splitOn
" "
.
Text
.
replace
"-"
" "
sortTficf
::
(
Map
Text
(
Double
,
Set
Text
))
->
[(
Double
,
Set
Text
)]
sortTficf
=
List
.
reverse
.
List
.
sortOn
fst
.
elems
getTficf
::
UserCorpusId
->
MasterCorpusId
->
(
Text
->
Text
)
getTficf'
::
UserCorpusId
->
MasterCorpusId
->
(
Text
->
Text
)
->
Cmd
err
(
Map
Text
(
Double
,
Set
Text
))
getTficf
u
m
f
=
do
getTficf
'
u
m
f
=
do
u'
<-
getNodesByNgramsUser
u
m'
<-
getNodesByNgramsMaster
u
m
...
...
@@ -54,8 +73,8 @@ type Infra = Context
toTficfData
::
Infra
->
Supra
->
Map
Text
(
Double
,
Set
Text
)
toTficfData
(
ti
,
mi
)
(
ts
,
ms
)
=
fromList
[
(
t
,
(
tficf
(
TficfInfra
ti
n
)
(
TficfSupra
ts
$
maybe
0
fst
$
Map
.
lookup
t
m
s
)
fromList
[
(
t
,
(
tficf
(
TficfInfra
n
ti
)
(
TficfSupra
(
maybe
0
fst
$
Map
.
lookup
t
ms
)
t
s
)
,
ns
)
)
...
...
@@ -63,7 +82,6 @@ toTficfData (ti, mi) (ts, ms) =
]
-- | fst is size of Supra Corpus
-- snd is Texts and size of Occurrences (different docs)
countNodesByNgramsWith
::
(
Text
->
Text
)
...
...
@@ -72,7 +90,8 @@ countNodesByNgramsWith :: (Text -> Text)
countNodesByNgramsWith
f
m
=
(
total
,
m'
)
where
total
=
fromIntegral
$
Set
.
size
$
Set
.
unions
$
elems
m
m'
=
Map
.
map
(
swap
.
second
(
fromIntegral
.
Set
.
size
))
$
groupNodesByNgramsWith
f
m
m'
=
Map
.
map
(
swap
.
second
(
fromIntegral
.
Set
.
size
))
$
groupNodesByNgramsWith
f
m
groupNodesByNgramsWith
::
(
Text
->
Text
)
...
...
@@ -157,10 +176,4 @@ SELECT nng.node_id, ng.id, ng.terms FROM nodes_ngrams nng
SELECT m.node_id, m.terms FROM nodesByNgramsMaster m
RIGHT JOIN nodesByNgramsUser u ON u.id = m.id
|]
src/Gargantext/Database/Schema/NodeNgram.hs
View file @
dec1cb78
...
...
@@ -173,13 +173,14 @@ data Action = Del | Add
type
NgramsParent
=
Text
type
NgramsChild
=
Text
{-
ngramsGroup :: Action -> ListId -> [(NgramsTypeId, NgramsParent, NgramsChild)] -> Cmd err ()
ngramsGroup _ _ [] = pure ()
ngramsGroup a lid input = void $ trace (show input) $ execPGSQuery (ngramsGroupQuery a) (DPS.Only $ Values fields input')
where
fields = map (\t-> QualifiedIdentifier Nothing t) ["int4","int4","text","int4","text","text"]
input' = map (\(ntpid,p,c) -> (lid, nodeTypeId NodeList, userMaster, ntpid, p,c)) input
-}
ngramsGroupQuery
::
Action
->
DPS
.
Query
ngramsGroupQuery
a
=
case
a
of
...
...
@@ -290,6 +291,8 @@ data NodeNgramsUpdate = NodeNgramsUpdate
-- TODO wrap these updates in a transaction.
-- TODO-ACCESS:
-- * check userId CanUpdateNgrams userListId
{-
updateNodeNgrams :: NodeNgramsUpdate -> Cmd err ()
updateNodeNgrams nnu = do
updateNodeNgrams' userListId $ _nnu_lists_update nnu
...
...
@@ -299,3 +302,4 @@ updateNodeNgrams nnu = do
ngramsGroup Add userListId $ _nnu_add_children nnu
where
userListId = _nnu_user_list_id nnu
-}
src/Gargantext/Text/Metrics/TFICF.hs
View file @
dec1cb78
...
...
@@ -48,6 +48,7 @@ type InfraContext = TficfContext
tficf
::
InfraContext
Double
Double
->
SupraContext
Double
Double
->
Double
tficf
(
TficfCorpus
c
c'
)
(
TficfLanguage
l
l'
)
=
tficf'
c
c'
l
l'
tficf
(
TficfDocument
d
d'
)(
TficfCorpus
c
c'
)
=
tficf'
d
d'
c
c'
tficf
(
TficfInfra
d
d'
)(
TficfSupra
c
c'
)
=
tficf'
d
d'
c
c'
tficf
_
_
=
panic
"Not in definition"
tficf'
::
Double
->
Double
->
Double
->
Double
->
Double
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment