Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
H
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Przemyslaw Kaminski
haskell-gargantext
Commits
26fe8014
Commit
26fe8014
authored
Jan 18, 2019
by
Alexandre Delanoë
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
[FEAT][Text][Metrics] TFICF.
parent
08d3adb7
Changes
6
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
63 additions
and
63 deletions
+63
-63
Ngrams.hs
src/Gargantext/API/Ngrams.hs
+5
-5
Individu.hs
src/Gargantext/Core/Types/Individu.hs
+3
-0
Flow.hs
src/Gargantext/Database/Flow.hs
+12
-1
Ngrams.hs
src/Gargantext/Database/Schema/Ngrams.hs
+17
-36
Node.hs
src/Gargantext/Database/Schema/Node.hs
+1
-1
TFICF.hs
src/Gargantext/Text/Metrics/TFICF.hs
+25
-20
No files found.
src/Gargantext/API/Ngrams.hs
View file @
26fe8014
...
@@ -58,7 +58,7 @@ import GHC.Generics (Generic)
...
@@ -58,7 +58,7 @@ import GHC.Generics (Generic)
import
Gargantext.Core.Utils.Prefix
(
unPrefix
)
import
Gargantext.Core.Utils.Prefix
(
unPrefix
)
import
Gargantext.Database.Types.Node
(
NodeType
(
..
))
import
Gargantext.Database.Types.Node
(
NodeType
(
..
))
import
Gargantext.Database.Schema.Node
(
defaultList
,
HasNodeError
)
import
Gargantext.Database.Schema.Node
(
defaultList
,
HasNodeError
)
import
Gargantext.Database.Schema.Ngrams
(
NgramsType
,
NgramsTypeId
,
ngramsTypeId
,
NgramsTableData
'
(
..
))
import
Gargantext.Database.Schema.Ngrams
(
NgramsType
,
NgramsTypeId
,
ngramsTypeId
,
NgramsTableData
(
..
))
import
qualified
Gargantext.Database.Schema.Ngrams
as
Ngrams
import
qualified
Gargantext.Database.Schema.Ngrams
as
Ngrams
import
Gargantext.Database.Schema.NodeNgram
import
Gargantext.Database.Schema.NodeNgram
import
Gargantext.Database.Utils
(
Cmd
)
import
Gargantext.Database.Utils
(
Cmd
)
...
@@ -120,10 +120,10 @@ newtype NgramsTable = NgramsTable { _ngramsTable :: [NgramsElement] }
...
@@ -120,10 +120,10 @@ newtype NgramsTable = NgramsTable { _ngramsTable :: [NgramsElement] }
deriving
(
Ord
,
Eq
,
Generic
,
ToJSON
,
FromJSON
,
Show
)
deriving
(
Ord
,
Eq
,
Generic
,
ToJSON
,
FromJSON
,
Show
)
-- | TODO Check N and Weight
-- | TODO Check N and Weight
toNgramsElement
::
[
NgramsTableData
'
]
->
[
NgramsElement
]
toNgramsElement
::
[
NgramsTableData
]
->
[
NgramsElement
]
toNgramsElement
ns
=
map
toNgramsElement'
ns
toNgramsElement
ns
=
map
toNgramsElement'
ns
where
where
toNgramsElement'
(
NgramsTableData
'
_
p
t
_
lt
w
)
=
NgramsElement
t
lt'
(
round
w
)
p'
c'
toNgramsElement'
(
NgramsTableData
_
p
t
_
lt
w
)
=
NgramsElement
t
lt'
(
round
w
)
p'
c'
where
where
p'
=
case
p
of
p'
=
case
p
of
Nothing
->
Nothing
Nothing
->
Nothing
...
@@ -132,14 +132,14 @@ toNgramsElement ns = map toNgramsElement' ns
...
@@ -132,14 +132,14 @@ toNgramsElement ns = map toNgramsElement' ns
lt'
=
maybe
(
panic
"API.Ngrams: listypeId"
)
identity
lt
lt'
=
maybe
(
panic
"API.Ngrams: listypeId"
)
identity
lt
mapParent
::
Map
Int
Text
mapParent
::
Map
Int
Text
mapParent
=
fromListWith
(
<>
)
$
map
(
\
(
NgramsTableData
'
i
_
t
_
_
_
)
->
(
i
,
t
))
ns
mapParent
=
fromListWith
(
<>
)
$
map
(
\
(
NgramsTableData
i
_
t
_
_
_
)
->
(
i
,
t
))
ns
mapChildren
::
Map
Text
(
Set
Text
)
mapChildren
::
Map
Text
(
Set
Text
)
mapChildren
=
mapKeys
(
\
i
->
(
maybe
(
panic
"API.Ngrams.mapChildren: ParentId with no Terms: Impossible"
)
identity
$
lookup
i
mapParent
))
mapChildren
=
mapKeys
(
\
i
->
(
maybe
(
panic
"API.Ngrams.mapChildren: ParentId with no Terms: Impossible"
)
identity
$
lookup
i
mapParent
))
$
fromListWith
(
<>
)
$
fromListWith
(
<>
)
$
map
(
first
fromJust
)
$
map
(
first
fromJust
)
$
filter
(
isJust
.
fst
)
$
filter
(
isJust
.
fst
)
$
map
(
\
(
NgramsTableData
'
_
p
t
_
_
_
)
->
(
p
,
Set
.
singleton
t
))
ns
$
map
(
\
(
NgramsTableData
_
p
t
_
_
_
)
->
(
p
,
Set
.
singleton
t
))
ns
instance
Arbitrary
NgramsTable
where
instance
Arbitrary
NgramsTable
where
...
...
src/Gargantext/Core/Types/Individu.hs
View file @
26fe8014
...
@@ -18,3 +18,6 @@ module Gargantext.Core.Types.Individu
...
@@ -18,3 +18,6 @@ module Gargantext.Core.Types.Individu
import
Data.Text
(
Text
)
import
Data.Text
(
Text
)
type
Username
=
Text
type
Username
=
Text
type
UsernameMaster
=
Username
type
UsernameSimple
=
Username
src/Gargantext/Database/Flow.hs
View file @
26fe8014
...
@@ -281,7 +281,10 @@ flowList uId cId ngs = do
...
@@ -281,7 +281,10 @@ flowList uId cId ngs = do
pure
lId
pure
lId
flowListUser
::
HasNodeError
err
=>
UserId
->
CorpusId
->
Cmd
err
NodeId
flowListUser
::
HasNodeError
err
=>
UserId
->
CorpusId
->
Cmd
err
NodeId
flowListUser
uId
cId
=
getOrMkList
cId
uId
flowListUser
uId
cId
=
do
lid
<-
getOrMkList
cId
uId
-- is <- insertLists lId $ ngrams2list ngs
pure
lid
------------------------------------------------------------------------
------------------------------------------------------------------------
...
@@ -319,3 +322,11 @@ insertLists lId lngs = insertNodeNgrams [ NodeNgram lId (_ngramsId ng) Nothing (
...
@@ -319,3 +322,11 @@ insertLists lId lngs = insertNodeNgrams [ NodeNgram lId (_ngramsId ng) Nothing (
|
(
l
,(
ngt
,
ng
))
<-
lngs
|
(
l
,(
ngt
,
ng
))
<-
lngs
]
]
------------------------------------------------------------------------
------------------------------------------------------------------------
src/Gargantext/Database/Schema/Ngrams.hs
View file @
26fe8014
...
@@ -53,6 +53,11 @@ import Prelude (Enum, Bounded, minBound, maxBound, Functor)
...
@@ -53,6 +53,11 @@ import Prelude (Enum, Bounded, minBound, maxBound, Functor)
import
qualified
Data.Set
as
DS
import
qualified
Data.Set
as
DS
import
qualified
Database.PostgreSQL.Simple
as
PGS
import
qualified
Database.PostgreSQL.Simple
as
PGS
type
NgramsTerms
=
Text
type
NgramsId
=
Int
type
Size
=
Int
--{-
--{-
data
NgramsPoly
id
terms
n
=
NgramsDb
{
ngrams_id
::
id
data
NgramsPoly
id
terms
n
=
NgramsDb
{
ngrams_id
::
id
,
ngrams_terms
::
terms
,
ngrams_terms
::
terms
...
@@ -128,10 +133,6 @@ ngramsTypeId NgramsTerms = 4
...
@@ -128,10 +133,6 @@ ngramsTypeId NgramsTerms = 4
fromNgramsTypeId
::
NgramsTypeId
->
Maybe
NgramsType
fromNgramsTypeId
::
NgramsTypeId
->
Maybe
NgramsType
fromNgramsTypeId
id
=
lookup
id
$
fromList
[(
ngramsTypeId
nt
,
nt
)
|
nt
<-
[
minBound
..
maxBound
]
::
[
NgramsType
]]
fromNgramsTypeId
id
=
lookup
id
$
fromList
[(
ngramsTypeId
nt
,
nt
)
|
nt
<-
[
minBound
..
maxBound
]
::
[
NgramsType
]]
type
NgramsTerms
=
Text
type
NgramsId
=
Int
type
Size
=
Int
------------------------------------------------------------------------
------------------------------------------------------------------------
-- | TODO put it in Gargantext.Text.Ngrams
-- | TODO put it in Gargantext.Text.Ngrams
data
Ngrams
=
Ngrams
{
_ngramsTerms
::
Text
data
Ngrams
=
Ngrams
{
_ngramsTerms
::
Text
...
@@ -232,7 +233,7 @@ queryInsertNgrams = [sql|
...
@@ -232,7 +233,7 @@ queryInsertNgrams = [sql|
getNgramsTableDb
::
NodeType
->
NgramsType
getNgramsTableDb
::
NodeType
->
NgramsType
->
NgramsTableParamUser
->
NgramsTableParamUser
->
Limit
->
Offset
->
Limit
->
Offset
->
Cmd
err
[
NgramsTableData
'
]
->
Cmd
err
[
NgramsTableData
]
getNgramsTableDb
nt
ngrt
ntp
limit_
offset_
=
do
getNgramsTableDb
nt
ngrt
ntp
limit_
offset_
=
do
...
@@ -245,7 +246,7 @@ getNgramsTableDb nt ngrt ntp limit_ offset_ = do
...
@@ -245,7 +246,7 @@ getNgramsTableDb nt ngrt ntp limit_ offset_ = do
listMasterId
<-
maybe
(
panic
"error master list"
)
(
view
node_id
)
<$>
head
<$>
getListsWithParentId
corpusMasterId
listMasterId
<-
maybe
(
panic
"error master list"
)
(
view
node_id
)
<$>
head
<$>
getListsWithParentId
corpusMasterId
getNgramsTableData
'
nt
ngrt
ntp
(
NgramsTableParam
listMasterId
corpusMasterId
)
limit_
offset_
getNgramsTableData
nt
ngrt
ntp
(
NgramsTableParam
listMasterId
corpusMasterId
)
limit_
offset_
data
NgramsTableParam
=
data
NgramsTableParam
=
NgramsTableParam
{
_nt_listId
::
NodeId
NgramsTableParam
{
_nt_listId
::
NodeId
...
@@ -255,44 +256,24 @@ data NgramsTableParam =
...
@@ -255,44 +256,24 @@ data NgramsTableParam =
type
NgramsTableParamUser
=
NgramsTableParam
type
NgramsTableParamUser
=
NgramsTableParam
type
NgramsTableParamMaster
=
NgramsTableParam
type
NgramsTableParamMaster
=
NgramsTableParam
data
NgramsTableData
=
NgramsTableData
{
_ntd_ngrams
::
Text
,
_ntd_n
::
Int
,
_ntd_listType
::
Maybe
ListType
,
_ntd_weight
::
Double
}
deriving
(
Show
)
getNgramsTableData
::
NodeType
->
NgramsType
->
NgramsTableParamUser
->
NgramsTableParamMaster
->
Limit
->
Offset
->
Cmd
err
[
NgramsTableData
]
getNgramsTableData
nodeT
ngrmT
(
NgramsTableParam
ul
uc
)
(
NgramsTableParam
ml
mc
)
limit_
offset_
=
trace
(
"Ngrams table params"
<>
show
params
)
<$>
map
(
\
(
t
,
n
,
nt
,
w
)
->
NgramsTableData
t
n
(
fromListTypeId
nt
)
w
)
<$>
runPGSQuery
querySelectTableNgrams
params
where
nodeTId
=
nodeTypeId
nodeT
ngrmTId
=
ngramsTypeId
ngrmT
params
=
(
ul
,
uc
,
nodeTId
,
ngrmTId
,
ml
,
mc
,
nodeTId
,
ngrmTId
,
uc
)
:.
(
limit_
,
offset_
)
data
NgramsTableData
'
=
NgramsTableData'
{
_ntd2
_id
::
Int
data
NgramsTableData
=
NgramsTableData
{
_ntd
_id
::
Int
,
_ntd
2
_parent_id
::
Maybe
Int
,
_ntd_parent_id
::
Maybe
Int
,
_ntd
2
_terms
::
Text
,
_ntd_terms
::
Text
,
_ntd
2
_n
::
Int
,
_ntd_n
::
Int
,
_ntd
2
_listType
::
Maybe
ListType
,
_ntd_listType
::
Maybe
ListType
,
_ntd
2
_weight
::
Double
,
_ntd_weight
::
Double
}
deriving
(
Show
)
}
deriving
(
Show
)
getNgramsTableData
'
::
NodeType
->
NgramsType
getNgramsTableData
::
NodeType
->
NgramsType
->
NgramsTableParamUser
->
NgramsTableParamMaster
->
NgramsTableParamUser
->
NgramsTableParamMaster
->
Limit
->
Offset
->
Limit
->
Offset
->
Cmd
err
[
NgramsTableData
'
]
->
Cmd
err
[
NgramsTableData
]
getNgramsTableData
'
nodeT
ngrmT
(
NgramsTableParam
ul
uc
)
(
NgramsTableParam
ml
mc
)
limit_
offset_
=
getNgramsTableData
nodeT
ngrmT
(
NgramsTableParam
ul
uc
)
(
NgramsTableParam
ml
mc
)
limit_
offset_
=
trace
(
"Ngrams table params: "
<>
show
params
)
<$>
trace
(
"Ngrams table params: "
<>
show
params
)
<$>
map
(
\
(
i
,
p
,
t
,
n
,
lt
,
w
)
->
NgramsTableData
'
i
p
t
n
(
fromListTypeId
lt
)
w
)
<$>
map
(
\
(
i
,
p
,
t
,
n
,
lt
,
w
)
->
NgramsTableData
i
p
t
n
(
fromListTypeId
lt
)
w
)
<$>
runPGSQuery
querySelectTableNgramsTrees
params
runPGSQuery
querySelectTableNgramsTrees
params
where
where
nodeTId
=
nodeTypeId
nodeT
nodeTId
=
nodeTypeId
nodeT
...
...
src/Gargantext/Database/Schema/Node.hs
View file @
26fe8014
...
@@ -527,7 +527,7 @@ mkRoot uname uId = case uId > 0 of
...
@@ -527,7 +527,7 @@ mkRoot uname uId = case uId > 0 of
mkCorpus
::
Maybe
Name
->
Maybe
HyperdataCorpus
->
ParentId
->
UserId
->
Cmd
err
[
CorpusId
]
mkCorpus
::
Maybe
Name
->
Maybe
HyperdataCorpus
->
ParentId
->
UserId
->
Cmd
err
[
CorpusId
]
mkCorpus
n
h
p
u
=
insertNodesR
[
nodeCorpusW
n
h
p
u
]
mkCorpus
n
h
p
u
=
insertNodesR
[
nodeCorpusW
n
h
p
u
]
getOrMkList
::
HasNodeError
err
=>
ParentId
->
UserId
->
Cmd
err
Node
Id
getOrMkList
::
HasNodeError
err
=>
ParentId
->
UserId
->
Cmd
err
List
Id
getOrMkList
pId
uId
=
getOrMkList
pId
uId
=
maybe
(
mkList'
pId
uId
)
(
pure
.
view
node_id
)
.
headMay
=<<
getListsWithParentId
pId
maybe
(
mkList'
pId
uId
)
(
pure
.
view
node_id
)
.
headMay
=<<
getListsWithParentId
pId
where
where
...
...
src/Gargantext/Text/Metrics/TFICF.hs
View file @
26fe8014
...
@@ -7,38 +7,43 @@ Maintainer : team@gargantext.org
...
@@ -7,38 +7,43 @@ Maintainer : team@gargantext.org
Stability : experimental
Stability : experimental
Portability : POSIX
Portability : POSIX
Definition of TFICF
Definition of TFICF
: Term Frequency - Inverse of Context Frequency
-}
-}
{-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE OverloadedStrings #-}
module
Gargantext.Text.Metrics.TFICF
where
module
Gargantext.Text.Metrics.TFICF
where
import
GHC.Generics
(
Generic
)
--import Data.Text (Text)
import
Gargantext.Prelude
import
Gargantext.Database.Schema.Ngrams
(
NgramsId
,
NgramsTerms
)
import
Data.Maybe
(
Maybe
)
data
TficfContext
n
m
=
TficfLanguage
n
m
|
TficfCorpus
n
m
|
TficfDocument
n
m
import
Data.Text
(
Text
)
deriving
(
Show
)
import
Text.Show
(
Show
())
-- import Gargantext.Types
data
Tficf
=
Tficf
{
tficf_ngramsId
::
NgramsId
import
Gargantext.Prelude
,
tficf_ngramsTerms
::
NgramsTerms
,
tficf_score
::
Double
}
data
Context
=
Corpus
|
Documen
t
type
SupraContext
=
TficfContex
t
deriving
(
Show
,
Generic
)
type
InfraContext
=
TficfContext
data
TFICF
=
TFICF
{
_tficfTerms
::
Text
-- | TFICF is a generalization of TFIDF
,
_tficfContext1
::
Context
-- https://en.wikipedia.org/wiki/Tf%E2%80%93idf
,
_tficfContext2
::
Context
tficf
::
InfraContext
Double
Double
->
SupraContext
Double
Double
->
Double
,
_tficfScore
::
Maybe
Double
tficf
(
TficfCorpus
c
c'
)
(
TficfLanguage
l
l'
)
=
tficf'
c
c'
l
l'
}
deriving
(
Show
,
Generic
)
tficf
(
TficfDocument
d
d'
)(
TficfCorpus
c
c'
)
=
tficf'
d
d'
c
c'
tficf
_
_
=
panic
"Not in definition"
tficf'
::
Double
->
Double
->
Double
->
Double
->
Double
tficf'
c
c'
l
l'
|
c
<=
c'
&&
l
<
l'
=
(
c
/
c'
)
/
log
(
l
/
l'
)
|
otherwise
=
panic
"Frequency impossible"
--tfidf :: Text -> TFICF
--tfidf txt = TFICF txt Document Corpus score
-- where
-- score = Nothing
tficf_example
::
[(
Double
,
Double
,
Double
,
Double
)]
tficf_example
=
undefined
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment