Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
157
Issues
157
List
Board
Labels
Milestones
Merge Requests
9
Merge Requests
9
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
gargantext
haskell-gargantext
Commits
2a514b43
Commit
2a514b43
authored
Dec 17, 2024
by
Grégoire Locqueville
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Removed some more dead code
parent
88b8e657
Changes
8
Hide whitespace changes
Inline
Side-by-side
Showing
8 changed files
with
2 additions
and
324 deletions
+2
-324
NgramsByContext.hs
src/Gargantext/Database/Action/Metrics/NgramsByContext.hs
+1
-162
Search.hs
src/Gargantext/Database/Action/Search.hs
+0
-16
Config.hs
src/Gargantext/Database/Admin/Config.hs
+0
-3
NodesContexts.hs
src/Gargantext/Database/Admin/Trigger/NodesContexts.hs
+1
-101
Contact.hs
src/Gargantext/Database/Admin/Types/Hyperdata/Contact.hs
+0
-14
CorpusField.hs
src/Gargantext/Database/Admin/Types/Hyperdata/CorpusField.hs
+0
-2
Frame.hs
src/Gargantext/Database/Admin/Types/Hyperdata/Frame.hs
+0
-6
GargDB.hs
src/Gargantext/Database/GargDB.hs
+0
-20
No files found.
src/Gargantext/Database/Action/Metrics/NgramsByContext.hs
View file @
2a514b43
...
...
@@ -31,7 +31,7 @@ import Gargantext.Core ( HasDBid(toDBid) )
import
Gargantext.Core.Text.Ngrams
(
NgramsType
(
..
))
import
Gargantext.Data.HashMap.Strict.Utils
as
HM
(
unionsWith
)
import
Gargantext.Database.Admin.Types.Hyperdata.Document
import
Gargantext.Database.Admin.Types.Node
(
ListId
,
CorpusId
,
NodeId
(
..
),
ContextId
(
..
),
MasterCorpusId
,
NodeType
(
NodeDocument
),
UserCorpusId
,
DocId
)
import
Gargantext.Database.Admin.Types.Node
(
ListId
,
CorpusId
,
NodeId
(
..
),
ContextId
(
..
),
NodeType
(
NodeDocument
)
)
import
Gargantext.Database.Prelude
(
DBCmd
,
runPGSQuery
)
import
Gargantext.Database.Schema.Ngrams
()
-- toDBid instance
import
Gargantext.Prelude
...
...
@@ -182,25 +182,6 @@ getOccByNgramsOnlyFast cId lId nt = do
FROM ns
LEFT JOIN ncids_agg ON ns.ngrams_id = ncids_agg.ngrams_id
|]
-- query = [sql|
-- WITH node_context_ids AS
-- (select context_id, ngrams_id
-- FROM context_node_ngrams_view
-- WHERE node_id = ?
-- ), ns AS
-- (select ngrams_id FROM node_stories
-- WHERE node_id = ? AND ngrams_type_id = ?
-- )
-- SELECT ng.terms,
-- ARRAY ( SELECT DISTINCT context_id
-- FROM node_context_ids
-- WHERE ns.ngrams_id = node_context_ids.ngrams_id
-- )
-- AS context_ids
-- FROM ngrams ng
-- JOIN ns ON ng.id = ns.ngrams_id
-- |]
selectNgramsOccurrencesOnlyByContextUser_withSample
::
HasDBid
NodeType
...
...
@@ -245,26 +226,6 @@ queryNgramsOccurrencesOnlyByContextUser_withSample = [sql|
|]
-- queryNgramsOccurrencesOnlyByContextUser_withSample :: DPS.Query
-- queryNgramsOccurrencesOnlyByContextUser_withSample = [sql|
-- WITH nodes_sample AS (SELECT c.id FROM contexts c TABLESAMPLE SYSTEM_ROWS (?)
-- JOIN nodes_contexts nc ON c.id = nc.context_id
-- WHERE c.typename = ?
-- AND nc.node_id = ?),
-- input_rows(terms) AS (?)
-- SELECT ng.terms, COUNT(cng.context_id) FROM context_node_ngrams cng
-- JOIN ngrams ng ON cng.ngrams_id = ng.id
-- JOIN input_rows ir ON ir.terms = ng.terms
-- JOIN nodes_contexts nc ON nc.context_id = cng.context_id
-- JOIN nodes_sample ns ON nc.context_id = ns.id
-- WHERE nc.node_id = ? -- CorpusId
-- AND cng.ngrams_type = ? -- NgramsTypeId
-- AND nc.category > 0
-- -- AND nc.context_id IN (SELECT id FROM nodes_sample)
-- GROUP BY cng.node_id, ng.terms
-- |]
selectNgramsOccurrencesOnlyByContextUser_withSample'
::
HasDBid
NodeType
=>
CorpusId
->
Int
...
...
@@ -362,125 +323,3 @@ queryNgramsOnlyByContextUser = [sql|
AND nc.category > 0
GROUP BY ng.terms, cng.context_id
|]
getNgramsByDocOnlyUser
::
DocId
->
[
ListId
]
->
NgramsType
->
[
NgramsTerm
]
->
DBCmd
err
(
HashMap
NgramsTerm
(
Set
NodeId
))
getNgramsByDocOnlyUser
cId
ls
nt
ngs
=
HM
.
unionsWith
(
<>
)
.
map
(
HM
.
fromListWith
(
<>
)
.
map
(
second
Set
.
singleton
))
<$>
mapM
(
selectNgramsOnlyByDocUser
cId
ls
nt
)
(
splitEvery
1000
ngs
)
selectNgramsOnlyByDocUser
::
DocId
->
[
ListId
]
->
NgramsType
->
[
NgramsTerm
]
->
DBCmd
err
[(
NgramsTerm
,
NodeId
)]
selectNgramsOnlyByDocUser
dId
ls
nt
tms
=
fmap
(
first
NgramsTerm
)
<$>
runPGSQuery
queryNgramsOnlyByDocUser
(
Values
fields
((
DPS
.
Only
.
unNgramsTerm
)
<$>
tms
)
,
Values
[
QualifiedIdentifier
Nothing
"int4"
]
(
DPS
.
Only
<$>
(
map
DPS
.
toField
ls
))
,
dId
,
toDBid
nt
)
where
fields
=
[
QualifiedIdentifier
Nothing
"text"
]
queryNgramsOnlyByDocUser
::
DPS
.
Query
queryNgramsOnlyByDocUser
=
[
sql
|
WITH input_rows(terms) AS (?),
input_list(id) AS (?)
SELECT ng.terms, cng.node_id FROM context_node_ngrams cng
JOIN ngrams ng ON cng.ngrams_id = ng.id
JOIN input_rows ir ON ir.terms = ng.terms
JOIN input_list il ON il.id = cng.context_id
WHERE cng.node_id = ? -- DocId
AND cng.ngrams_type = ? -- NgramsTypeId
GROUP BY ng.terms, cng.node_id
|]
------------------------------------------------------------------------
-- | TODO filter by language, database, any social field
getContextsByNgramsMaster
::
HasDBid
NodeType
=>
UserCorpusId
->
MasterCorpusId
->
DBCmd
err
(
HashMap
Text
(
Set
NodeId
))
getContextsByNgramsMaster
ucId
mcId
=
unionsWith
(
<>
)
.
map
(
HM
.
fromListWith
(
<>
)
.
map
(
\
(
n
,
t
)
->
(
t
,
Set
.
singleton
n
)))
-- . takeWhile (not . List.null)
-- . takeWhile (\l -> List.length l > 3)
<$>
mapM
(
selectNgramsByContextMaster
1000
ucId
mcId
)
[
0
,
500
..
10000
]
selectNgramsByContextMaster
::
HasDBid
NodeType
=>
Int
->
UserCorpusId
->
MasterCorpusId
->
Int
->
DBCmd
err
[(
NodeId
,
Text
)]
selectNgramsByContextMaster
n
ucId
mcId
p
=
runPGSQuery
queryNgramsByContextMaster'
(
ucId
,
toDBid
NgramsTerms
,
toDBid
NodeDocument
,
p
,
toDBid
NodeDocument
,
p
,
n
,
mcId
,
toDBid
NodeDocument
,
toDBid
NgramsTerms
)
-- | TODO fix context_node_ngrams relation
queryNgramsByContextMaster'
::
DPS
.
Query
queryNgramsByContextMaster'
=
[
sql
|
WITH contextsByNgramsUser AS (
SELECT n.id, ng.terms FROM contexts n
JOIN nodes_contexts nn ON n.id = nn.context_id
JOIN context_node_ngrams cng ON cng.context_id = n.id
JOIN ngrams ng ON cng.ngrams_id = ng.id
WHERE nn.node_id = ? -- UserCorpusId
-- AND n.typename = ? -- toDBid
AND cng.ngrams_type = ? -- NgramsTypeId
AND nn.category > 0
AND node_pos(n.id,?) >= ?
AND node_pos(n.id,?) < ?
GROUP BY n.id, ng.terms
),
contextsByNgramsMaster AS (
SELECT n.id, ng.terms FROM contexts n TABLESAMPLE SYSTEM_ROWS(?)
JOIN context_node_ngrams cng ON n.id = cng.context_id
JOIN ngrams ng ON ng.id = cng.ngrams_id
WHERE n.parent_id = ? -- Master Corpus toDBid
AND n.typename = ? -- toDBid
AND cng.ngrams_type = ? -- NgramsTypeId
GROUP BY n.id, ng.terms
)
SELECT m.id, m.terms FROM nodesByNgramsMaster m
RIGHT JOIN contextsByNgramsUser u ON u.id = m.id
|]
-- | Refreshes the \"context_node_ngrams_view\" materialized view.
-- This function will be run :
-- - periodically
-- - at reindex stage
-- - at the end of each text flow
-- refreshNgramsMaterialized :: Cmd err ()
-- refreshNgramsMaterialized = void $ execPGSQuery refreshNgramsMaterializedQuery ()
-- where
-- refreshNgramsMaterializedQuery :: DPS.Query
-- refreshNgramsMaterializedQuery =
-- [sql| REFRESH MATERIALIZED VIEW CONCURRENTLY context_node_ngrams_view; |]
src/Gargantext/Database/Action/Search.hs
View file @
2a514b43
...
...
@@ -17,7 +17,6 @@ module Gargantext.Database.Action.Search (
searchInCorpus
,
searchInCorpusWithContacts
,
searchCountInCorpus
,
searchInCorpusWithNgrams
,
searchDocInDatabase
)
where
...
...
@@ -34,7 +33,6 @@ import Data.Time (UTCTime)
import
Gargantext.Core
(
Lang
(
EN
),
HasDBid
(
toDBid
)
)
import
Gargantext.Core.Text.Corpus.Query
qualified
as
API
import
Gargantext.Core.Text.Terms.Mono.Stem
(
stem
,
StemmingAlgorithm
(
..
))
import
Gargantext.Core.Text.Ngrams
(
NgramsType
(
..
))
import
Gargantext.Core.Types
import
Gargantext.Core.Types.Query
(
IsTrash
,
Limit
,
Offset
)
import
Gargantext.Database.Admin.Types.Hyperdata.Contact
(
HyperdataContact
(
..
)
)
...
...
@@ -154,20 +152,6 @@ searchDocInDatabase p t = runOpaQuery (queryDocInDatabase p t)
returnA
-<
(
_ns_id
row
,
_ns_hyperdata
row
)
------------------------------------------------------------------------
-- | Search ngrams in documents, ranking them by TF-IDF. We narrow our
-- search only to map/candidate terms.
searchInCorpusWithNgrams
::
HasDBid
NodeType
=>
CorpusId
->
ListId
->
IsTrash
->
NgramsType
->
[[
Text
]]
->
Maybe
Offset
->
Maybe
Limit
->
Maybe
OrderBy
->
DBCmd
err
[
FacetDoc
]
searchInCorpusWithNgrams
_cId
_lId
_t
_ngt
_q
_o
_l
_order
=
undefined
-- | Compute TF-IDF for all 'ngramIds' in given 'CorpusId'. In this
-- case only the "TF" part makes sense and so we only compute the
-- ratio of "number of times our terms appear in given document" and
...
...
src/Gargantext/Database/Admin/Config.hs
View file @
2a514b43
...
...
@@ -34,9 +34,6 @@ corpusMasterName = "Main"
userMaster
::
Text
userMaster
=
"gargantua"
userArbitrary
::
Text
userArbitrary
=
"user1"
instance
HasDBid
NodeType
where
toDBid
n
=
nodeTypes
Bimap
.!
n
-- nodeTypes is total, this cannot fail by construction
lookupDBid
i
=
Bimap
.
lookupR
i
nodeTypes
...
...
src/Gargantext/Database/Admin/Trigger/NodesContexts.hs
View file @
2a514b43
...
...
@@ -11,7 +11,7 @@ Triggers on NodesNodes table.
-}
{-# LANGUAGE QuasiQuotes
#-}
{-# LANGUAGE QuasiQuotes #-}
module
Gargantext.Database.Admin.Trigger.NodesContexts
where
...
...
@@ -141,103 +141,3 @@ triggerUpdateDel lId = execPGSQuery query (lId, toDBid NodeList)
WHEN (OLD.category >= 1 AND NEW.category <= 0)
EXECUTE PROCEDURE set_update_ngrams_count_del();
|]
triggerDeleteCount
::
MasterListId
->
DBCmd
err
Int64
triggerDeleteCount
lId
=
execPGSQuery
query
(
lId
,
toDBid
NodeList
)
where
query
::
DPS
.
Query
query
=
[
sql
|
CREATE OR REPLACE FUNCTION set_delete_count() RETURNS trigger AS $$
BEGIN
UPDATE context_node_ngrams SET weight = weight - d.delete_count
FROM ( SELECT lists.id as node_id
, old1.context_id as context_id
, nnn.ngrams_id as ngrams_id
, nnn.ngrams_type as ngrams_type
, count(*) as delete_count FROM OLD as old1
INNER JOIN contexts doc ON doc.id = old1.context_id
INNER JOIN nodes lists ON lists.parent_id = old1.node_id
INNER JOIN context_node_ngrams nnn ON nnn.context_id = doc.id
WHERE nnn.node_id in (?, lists.id)
AND lists.typename = ?
GROUP BY old1.context_id, lists.id, nnn.ngrams_id, nnn.ngrams_type
) AS d
WHERE context_node_ngrams.context_id = d.context_id
AND context_node_ngrams.node_id = d.node_id
AND context_node_ngrams.ngrams_id = d.ngrams_id
AND context_node_ngrams.ngrams_type = d.ngrams_type
;
RETURN NULL;
END
$$ LANGUAGE plpgsql;
-- DROP trigger trigger_delete_count on nodes_nodes;
CREATE TRIGGER trigger_delete_count AFTER DELETE on nodes_contexts
REFERENCING OLD TABLE AS OLD
FOR EACH STATEMENT
EXECUTE PROCEDURE set_delete_count();
|]
-- TODO add groups
{-
triggerCoocInsert :: MasterListId -> Cmd err Int64
triggerCoocInsert lid = execPGSQuery query ( lid
-- , nodeTypeId NodeCorpus
-- , nodeTypeId NodeDocument
-- , nodeTypeId NodeList
, toDBid CandidateTerm
, toDBid CandidateTerm
)
where
query :: DPS.Query
query = [sql|
CREATE OR REPLACE FUNCTION nodes_nodes_set_cooc() RETURNS trigger AS $$
BEGIN
IF pg_trigger_depth() <> 1 THEN
RETURN NEW;
END IF;
IF TG_OP = 'INSERT' THEN
INSERT INTO node_nodengrams_nodengrams (node_id, node_ngrams1_id, node_ngrams2_id, weight)
WITH input(corpus_id, nn1, nn2, weight) AS (
SELECT new1.node1_id, nn1.id, nn2.id, count(*) from NEW as new1
INNER JOIN node_ngrams nn1
ON nn1.node_id = ? -- COALESCE(?,?) --(masterList, userList)
INNER JOIN node_ngrams nn2
ON nn2.node_id = nn1.node_id
INNER JOIN node_node_ngrams2 nnn1
ON nnn1.node_id = new1.node2_id
INNER JOIN node_node_ngrams2 nnn2
ON nnn2.node_id = new1.node2_id
WHERE nnn1.nodengrams_id = nn1.id
AND nnn2.nodengrams_id = nn2.id
AND nn1.id < nn2.id
AND nn1.node_subtype >= ?
AND nn2.node_subtype >= ?
GROUP BY new1.node1_id, nn1.id, nn2.id
)
SELECT * from input where weight >= 1
ON CONFLICT (node_id, node_ngrams1_id, node_ngrams2_id)
DO UPDATE set weight = node_nodengrams_nodengrams.weight + excluded.weight
;
END IF;
RETURN NULL;
END
$$ LANGUAGE plpgsql;
-- DROP trigger trigger_cooc on node_node_ngrams2;
CREATE TRIGGER trigger_cooc_insert AFTER INSERT on nodes_nodes
REFERENCING NEW TABLE AS NEW
FOR EACH STATEMENT
EXECUTE PROCEDURE nodes_nodes_set_cooc();
|]
-}
src/Gargantext/Database/Admin/Types/Hyperdata/Contact.hs
View file @
2a514b43
...
...
@@ -70,20 +70,6 @@ data ContactMetaData =
,
_cm_lastValidation
::
Maybe
Text
-- TODO UTCTIME
}
deriving
(
Eq
,
Show
,
Generic
)
defaultContactMetaData
::
ContactMetaData
defaultContactMetaData
=
ContactMetaData
(
Just
"bdd"
)
(
Just
"TODO UTCTime"
)
arbitraryHyperdataContact
::
HyperdataContact
arbitraryHyperdataContact
=
HyperdataContact
{
_hc_bdd
=
Nothing
,
_hc_who
=
Nothing
,
_hc_where
=
[]
,
_hc_title
=
Nothing
,
_hc_source
=
Nothing
,
_hc_lastValidation
=
Nothing
}
data
ContactWho
=
ContactWho
{
_cw_id
::
Maybe
Text
,
_cw_firstName
::
Maybe
Text
...
...
src/Gargantext/Database/Admin/Types/Hyperdata/CorpusField.hs
View file @
2a514b43
...
...
@@ -57,8 +57,6 @@ data HyperdataField a =
,
_hf_name
::
!
Text
,
_hf_data
::
!
a
}
deriving
(
Generic
,
Show
)
defaultHyperdataField
::
HyperdataField
CorpusField
defaultHyperdataField
=
HyperdataField
Markdown
"name"
defaultCorpusField
------------------------------------------------------------------------
-- Instances
...
...
src/Gargantext/Database/Admin/Types/Hyperdata/Frame.hs
View file @
2a514b43
...
...
@@ -69,9 +69,3 @@ getHyperdataFrameContents (HyperdataFrame { _hf_base, _hf_frame_id }) = do
_
<-
Wreq
.
headWith
Wreq
.
defaults
$
T
.
unpack
path
r
<-
Wreq
.
get
$
T
.
unpack
path
pure
$
decodeUtf8
$
toStrict
$
r
^.
Wreq
.
responseBody
getHyperdataFrameTSV
::
HyperdataFrame
->
IO
Text
getHyperdataFrameTSV
(
HyperdataFrame
{
_hf_base
,
_hf_frame_id
})
=
do
let
path
=
T
.
concat
[
_hf_base
,
"/"
,
_hf_frame_id
,
".csv"
]
r
<-
Wreq
.
get
$
T
.
unpack
path
pure
$
decodeUtf8
$
toStrict
$
r
^.
Wreq
.
responseBody
src/Gargantext/Database/GargDB.hs
View file @
2a514b43
...
...
@@ -60,10 +60,6 @@ type GargFilePath = (FolderPath, FileName)
type
FolderPath
=
FilePath
type
FileName
=
FilePath
--------------------------------
dataFilePath
::
(
ToJSON
a
)
=>
a
->
GargFilePath
dataFilePath
=
toPath
.
hash
.
Prelude
.
show
.
toJSON
randomFilePath
::
(
MonadReader
env
m
,
MonadBase
IO
m
...
...
@@ -154,22 +150,6 @@ rmFile :: ( MonadReader env m
=>
FilePath
->
m
()
rmFile
=
onDisk_1
SD
.
removeFile
cpFile
::
(
MonadReader
env
m
,
MonadBase
IO
m
,
HasConfig
env
)
=>
FilePath
->
FilePath
->
m
()
cpFile
=
onDisk_2
SD
.
copyFile
---
mvFile
::
(
MonadReader
env
m
,
MonadBase
IO
m
,
HasConfig
env
)
=>
FilePath
->
FilePath
->
m
()
mvFile
fp1
fp2
=
do
cpFile
fp1
fp2
rmFile
fp1
pure
()
------------------------------------------------------------------------
onDisk_1
::
(
MonadReader
env
m
,
MonadBase
IO
m
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment