Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
199
Issues
199
List
Board
Labels
Milestones
Merge Requests
12
Merge Requests
12
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
gargantext
haskell-gargantext
Commits
3e7c2638
Commit
3e7c2638
authored
Feb 25, 2019
by
Alexandre Delanoë
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
[FIX] bugs at import (ngrams must not exceed 255 chars).
parent
b34b8baf
Pipeline
#224
failed with stage
Changes
5
Pipelines
1
Show whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
73 additions
and
23 deletions
+73
-23
.gitignore
.gitignore
+3
-0
TFICF.hs
src/Gargantext/Core/Metrics/TFICF.hs
+32
-0
Flow.hs
src/Gargantext/Database/Flow.hs
+28
-13
TFICF.hs
src/Gargantext/Database/Metrics/TFICF.hs
+8
-8
schema.sql
src/Gargantext/Database/Schema/schema.sql
+2
-2
No files found.
.gitignore
View file @
3e7c2638
...
...
@@ -24,3 +24,6 @@ _darcs
*.pdf
# Runtime
# Repo
repo.json*
src/Gargantext/Core/Metrics/TFICF.hs
0 → 100644
View file @
3e7c2638
{-|
Module : Gargantext.Core.Metrics.TFICF
Description : Core Metrics TFICF filtering and grouping
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE OverloadedStrings #-}
module
Gargantext.Core.Metrics.TFICF
where
import
Data.Map
import
Gargantext.Prelude
import
Gargantext.Database.Metrics.TFICF
import
Gargantext.Database.Schema.Ngrams
import
Gargantext.Text.Metrics.TFICF
import
Gargantext.API.Ngrams
group
::
TficfData
->
Map
NgramsType
[
NgramsElement
]
->
TficfData
group
=
undefined
filter
::
TficfData
->
[
NgramsElement
]
filter
=
undefined
src/Gargantext/Database/Flow.hs
View file @
3e7c2638
...
...
@@ -29,6 +29,7 @@ import Data.Map (Map, lookup, fromListWith, toList)
import
Data.Maybe
(
Maybe
(
..
),
catMaybes
)
import
Data.Monoid
import
Data.Text
(
Text
,
splitOn
,
intercalate
)
import
qualified
Data.Text
as
Text
import
Data.Tuple.Extra
(
both
)
import
Data.List
(
concat
)
import
GHC.Show
(
Show
)
...
...
@@ -38,7 +39,7 @@ import Gargantext.Core.Types.Main
import
Gargantext.Core
(
Lang
(
..
))
import
Gargantext.Database.Config
(
userMaster
,
userArbitrary
,
corpusMasterName
)
import
Gargantext.Database.Flow.Utils
(
insertToNodeNgrams
)
import
Gargantext.Database.Metrics.TFICF
(
getTficf
)
--
import Gargantext.Database.Metrics.TFICF (getTficf)
import
Gargantext.Text.Terms
(
extractTerms
)
import
Gargantext.Text.Metrics.TFICF
(
Tficf
(
..
))
import
Gargantext.Database.Metrics.Count
(
getNgramsElementsWithParentNodeId
)
...
...
@@ -205,8 +206,8 @@ toInserted :: [ReturnId] -> Map HashId ReturnId
toInserted
=
DM
.
fromList
.
map
(
\
r
->
(
reUniqId
r
,
r
)
)
.
filter
(
\
r
->
reInserted
r
==
True
)
data
DocumentWithId
=
DocumentWithId
{
documentId
::
!
NodeId
data
DocumentWithId
=
DocumentWithId
{
documentId
::
!
NodeId
,
documentData
::
!
HyperdataDocument
}
deriving
(
Show
)
...
...
@@ -220,17 +221,23 @@ mergeData rs = catMaybes . map toDocumentWithId . DM.toList
<*>
Just
hpd
------------------------------------------------------------------------
data
DocumentIdWithNgrams
=
DocumentIdWithNgrams
data
DocumentIdWithNgrams
=
DocumentIdWithNgrams
{
documentWithId
::
!
DocumentWithId
,
document_ngrams
::
!
(
Map
Ngrams
(
Map
NgramsType
Int
))
}
deriving
(
Show
)
-- TODO group terms
extractNgramsT
::
HasNodeError
err
=>
HyperdataDocument
->
Cmd
err
(
Map
Ngrams
(
Map
NgramsType
Int
))
extractNgramsT
doc
=
do
extractNgramsT
hd
=
filterNgramsT
255
<$>
extractNgramsT'
hd
extractNgramsT'
::
HasNodeError
err
=>
HyperdataDocument
->
Cmd
err
(
Map
Ngrams
(
Map
NgramsType
Int
))
extractNgramsT'
doc
=
do
let
source
=
text2ngrams
$
maybe
"Nothing"
identity
$
_hyperdataDocument_source
doc
...
...
@@ -257,7 +264,15 @@ extractNgramsT doc = do
<>
[(
a'
,
DM
.
singleton
Authors
1
)
|
a'
<-
authors
]
<>
[(
t'
,
DM
.
singleton
NgramsTerms
1
)
|
t'
<-
terms'
]
--{-
filterNgramsT
::
Int
->
Map
Ngrams
(
Map
NgramsType
Int
)
->
Map
Ngrams
(
Map
NgramsType
Int
)
filterNgramsT
s
ms
=
DM
.
fromList
$
map
(
\
a
->
filter'
s
a
)
$
DM
.
toList
ms
where
filter'
s'
(
ng
@
(
Ngrams
t
n
),
y
)
=
case
(
Text
.
length
t
)
<
s'
of
True
->
(
ng
,
y
)
False
->
(
Ngrams
(
Text
.
take
s'
t
)
n
,
y
)
--}
documentIdWithNgrams
::
HasNodeError
err
=>
(
HyperdataDocument
...
...
@@ -310,7 +325,7 @@ flowListUser :: FlowCmdM env err m
->
Map
NgramsType
[
NgramsElement
]
->
Int
->
m
ListId
flowListUser
uId
cId
ngsM
n
=
do
flowListUser
uId
cId
ngsM
_
n
=
do
lId
<-
getOrMkList
cId
uId
{-
...
...
src/Gargantext/Database/Metrics/TFICF.hs
View file @
3e7c2638
{-|
Module : Gargantext.Database.Metrics.TFICF
Description :
Ngram connection to the
Database
Description :
Building TFICF Data from
Database
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
TFICF, generalization of TFIDF
-}
...
...
@@ -22,13 +21,13 @@ import Data.Text (Text)
import
Database.PostgreSQL.Simple.SqlQQ
(
sql
)
import
qualified
Database.PostgreSQL.Simple
as
DPS
import
Safe
(
headMay
)
import
Gargantext.Text.Metrics.TFICF
-- (tficf)
--
import Gargantext.Text.Metrics.TFICF -- (tficf)
import
Gargantext.Prelude
import
Gargantext.Core.Types.Individu
(
UsernameMaster
)
import
Gargantext.Database.Utils
(
Cmd
,
runPGSQuery
)
import
Gargantext.Database.Types.Node
(
ListId
,
CorpusId
,
NodeType
(
..
))
import
Gargantext.Database.Types.Node
(
{-ListId,-}
CorpusId
,
NodeType
(
..
))
import
Gargantext.Database.Config
(
nodeTypeId
)
import
Gargantext.Database.Schema.Ngrams
(
NgramsId
,
NgramsTerms
,
NgramsType
,
ngramsTypeId
)
import
Gargantext.Database.Schema.Ngrams
(
{-NgramsId, NgramsTerms,-}
NgramsType
,
ngramsTypeId
)
type
OccGlobal
=
Double
type
OccCorpus
=
Double
...
...
@@ -45,6 +44,7 @@ data TficfData = TficfData
,
td_terms
::
!
[
TficfTerms
]
}
deriving
(
Show
)
getTficf
::
UsernameMaster
->
CorpusId
->
NgramsType
->
Cmd
err
TficfData
getTficf
u
cId
ngType
=
do
...
...
src/Gargantext/Database/Schema/schema.sql
View file @
3e7c2638
...
...
@@ -89,8 +89,8 @@ ALTER TABLE public.nodes_ngrams_ngrams OWNER TO gargantua;
---------------------------------------------------------
CREATE
TABLE
public
.
nodes_nodes
(
node1_id
integer
NOT
NULL
,
node2_id
integer
NOT
NULL
,
node1_id
integer
NOT
NULL
REFERENCES
public
.
nodes
(
id
)
ON
DELETE
CASCADE
,
node2_id
integer
NOT
NULL
REFERENCES
public
.
nodes
(
id
)
ON
DELETE
CASCADE
,
score
real
,
favorite
boolean
,
delete
boolean
,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment