Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
175
Issues
175
List
Board
Labels
Milestones
Merge Requests
10
Merge Requests
10
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
gargantext
haskell-gargantext
Commits
2a514b43
Commit
2a514b43
authored
Dec 17, 2024
by
Grégoire Locqueville
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Removed some more dead code
parent
88b8e657
Changes
8
Hide whitespace changes
Inline
Side-by-side
Showing
8 changed files
with
2 additions
and
324 deletions
+2
-324
NgramsByContext.hs
src/Gargantext/Database/Action/Metrics/NgramsByContext.hs
+1
-162
Search.hs
src/Gargantext/Database/Action/Search.hs
+0
-16
Config.hs
src/Gargantext/Database/Admin/Config.hs
+0
-3
NodesContexts.hs
src/Gargantext/Database/Admin/Trigger/NodesContexts.hs
+1
-101
Contact.hs
src/Gargantext/Database/Admin/Types/Hyperdata/Contact.hs
+0
-14
CorpusField.hs
src/Gargantext/Database/Admin/Types/Hyperdata/CorpusField.hs
+0
-2
Frame.hs
src/Gargantext/Database/Admin/Types/Hyperdata/Frame.hs
+0
-6
GargDB.hs
src/Gargantext/Database/GargDB.hs
+0
-20
No files found.
src/Gargantext/Database/Action/Metrics/NgramsByContext.hs
View file @
2a514b43
...
...
@@ -31,7 +31,7 @@ import Gargantext.Core ( HasDBid(toDBid) )
import
Gargantext.Core.Text.Ngrams
(
NgramsType
(
..
))
import
Gargantext.Data.HashMap.Strict.Utils
as
HM
(
unionsWith
)
import
Gargantext.Database.Admin.Types.Hyperdata.Document
import
Gargantext.Database.Admin.Types.Node
(
ListId
,
CorpusId
,
NodeId
(
..
),
ContextId
(
..
),
MasterCorpusId
,
NodeType
(
NodeDocument
),
UserCorpusId
,
DocId
)
import
Gargantext.Database.Admin.Types.Node
(
ListId
,
CorpusId
,
NodeId
(
..
),
ContextId
(
..
),
NodeType
(
NodeDocument
)
)
import
Gargantext.Database.Prelude
(
DBCmd
,
runPGSQuery
)
import
Gargantext.Database.Schema.Ngrams
()
-- toDBid instance
import
Gargantext.Prelude
...
...
@@ -182,25 +182,6 @@ getOccByNgramsOnlyFast cId lId nt = do
FROM ns
LEFT JOIN ncids_agg ON ns.ngrams_id = ncids_agg.ngrams_id
|]
-- query = [sql|
-- WITH node_context_ids AS
-- (select context_id, ngrams_id
-- FROM context_node_ngrams_view
-- WHERE node_id = ?
-- ), ns AS
-- (select ngrams_id FROM node_stories
-- WHERE node_id = ? AND ngrams_type_id = ?
-- )
-- SELECT ng.terms,
-- ARRAY ( SELECT DISTINCT context_id
-- FROM node_context_ids
-- WHERE ns.ngrams_id = node_context_ids.ngrams_id
-- )
-- AS context_ids
-- FROM ngrams ng
-- JOIN ns ON ng.id = ns.ngrams_id
-- |]
selectNgramsOccurrencesOnlyByContextUser_withSample
::
HasDBid
NodeType
...
...
@@ -245,26 +226,6 @@ queryNgramsOccurrencesOnlyByContextUser_withSample = [sql|
|]
-- queryNgramsOccurrencesOnlyByContextUser_withSample :: DPS.Query
-- queryNgramsOccurrencesOnlyByContextUser_withSample = [sql|
-- WITH nodes_sample AS (SELECT c.id FROM contexts c TABLESAMPLE SYSTEM_ROWS (?)
-- JOIN nodes_contexts nc ON c.id = nc.context_id
-- WHERE c.typename = ?
-- AND nc.node_id = ?),
-- input_rows(terms) AS (?)
-- SELECT ng.terms, COUNT(cng.context_id) FROM context_node_ngrams cng
-- JOIN ngrams ng ON cng.ngrams_id = ng.id
-- JOIN input_rows ir ON ir.terms = ng.terms
-- JOIN nodes_contexts nc ON nc.context_id = cng.context_id
-- JOIN nodes_sample ns ON nc.context_id = ns.id
-- WHERE nc.node_id = ? -- CorpusId
-- AND cng.ngrams_type = ? -- NgramsTypeId
-- AND nc.category > 0
-- -- AND nc.context_id IN (SELECT id FROM nodes_sample)
-- GROUP BY cng.node_id, ng.terms
-- |]
selectNgramsOccurrencesOnlyByContextUser_withSample'
::
HasDBid
NodeType
=>
CorpusId
->
Int
...
...
@@ -362,125 +323,3 @@ queryNgramsOnlyByContextUser = [sql|
AND nc.category > 0
GROUP BY ng.terms, cng.context_id
|]
getNgramsByDocOnlyUser
::
DocId
->
[
ListId
]
->
NgramsType
->
[
NgramsTerm
]
->
DBCmd
err
(
HashMap
NgramsTerm
(
Set
NodeId
))
getNgramsByDocOnlyUser
cId
ls
nt
ngs
=
HM
.
unionsWith
(
<>
)
.
map
(
HM
.
fromListWith
(
<>
)
.
map
(
second
Set
.
singleton
))
<$>
mapM
(
selectNgramsOnlyByDocUser
cId
ls
nt
)
(
splitEvery
1000
ngs
)
selectNgramsOnlyByDocUser
::
DocId
->
[
ListId
]
->
NgramsType
->
[
NgramsTerm
]
->
DBCmd
err
[(
NgramsTerm
,
NodeId
)]
selectNgramsOnlyByDocUser
dId
ls
nt
tms
=
fmap
(
first
NgramsTerm
)
<$>
runPGSQuery
queryNgramsOnlyByDocUser
(
Values
fields
((
DPS
.
Only
.
unNgramsTerm
)
<$>
tms
)
,
Values
[
QualifiedIdentifier
Nothing
"int4"
]
(
DPS
.
Only
<$>
(
map
DPS
.
toField
ls
))
,
dId
,
toDBid
nt
)
where
fields
=
[
QualifiedIdentifier
Nothing
"text"
]
queryNgramsOnlyByDocUser
::
DPS
.
Query
queryNgramsOnlyByDocUser
=
[
sql
|
WITH input_rows(terms) AS (?),
input_list(id) AS (?)
SELECT ng.terms, cng.node_id FROM context_node_ngrams cng
JOIN ngrams ng ON cng.ngrams_id = ng.id
JOIN input_rows ir ON ir.terms = ng.terms
JOIN input_list il ON il.id = cng.context_id
WHERE cng.node_id = ? -- DocId
AND cng.ngrams_type = ? -- NgramsTypeId
GROUP BY ng.terms, cng.node_id
|]
------------------------------------------------------------------------
-- | TODO filter by language, database, any social field
getContextsByNgramsMaster
::
HasDBid
NodeType
=>
UserCorpusId
->
MasterCorpusId
->
DBCmd
err
(
HashMap
Text
(
Set
NodeId
))
getContextsByNgramsMaster
ucId
mcId
=
unionsWith
(
<>
)
.
map
(
HM
.
fromListWith
(
<>
)
.
map
(
\
(
n
,
t
)
->
(
t
,
Set
.
singleton
n
)))
-- . takeWhile (not . List.null)
-- . takeWhile (\l -> List.length l > 3)
<$>
mapM
(
selectNgramsByContextMaster
1000
ucId
mcId
)
[
0
,
500
..
10000
]
selectNgramsByContextMaster
::
HasDBid
NodeType
=>
Int
->
UserCorpusId
->
MasterCorpusId
->
Int
->
DBCmd
err
[(
NodeId
,
Text
)]
selectNgramsByContextMaster
n
ucId
mcId
p
=
runPGSQuery
queryNgramsByContextMaster'
(
ucId
,
toDBid
NgramsTerms
,
toDBid
NodeDocument
,
p
,
toDBid
NodeDocument
,
p
,
n
,
mcId
,
toDBid
NodeDocument
,
toDBid
NgramsTerms
)
-- | TODO fix context_node_ngrams relation
queryNgramsByContextMaster'
::
DPS
.
Query
queryNgramsByContextMaster'
=
[
sql
|
WITH contextsByNgramsUser AS (
SELECT n.id, ng.terms FROM contexts n
JOIN nodes_contexts nn ON n.id = nn.context_id
JOIN context_node_ngrams cng ON cng.context_id = n.id
JOIN ngrams ng ON cng.ngrams_id = ng.id
WHERE nn.node_id = ? -- UserCorpusId
-- AND n.typename = ? -- toDBid
AND cng.ngrams_type = ? -- NgramsTypeId
AND nn.category > 0
AND node_pos(n.id,?) >= ?
AND node_pos(n.id,?) < ?
GROUP BY n.id, ng.terms
),
contextsByNgramsMaster AS (
SELECT n.id, ng.terms FROM contexts n TABLESAMPLE SYSTEM_ROWS(?)
JOIN context_node_ngrams cng ON n.id = cng.context_id
JOIN ngrams ng ON ng.id = cng.ngrams_id
WHERE n.parent_id = ? -- Master Corpus toDBid
AND n.typename = ? -- toDBid
AND cng.ngrams_type = ? -- NgramsTypeId
GROUP BY n.id, ng.terms
)
SELECT m.id, m.terms FROM nodesByNgramsMaster m
RIGHT JOIN contextsByNgramsUser u ON u.id = m.id
|]
-- | Refreshes the \"context_node_ngrams_view\" materialized view.
-- This function will be run :
-- - periodically
-- - at reindex stage
-- - at the end of each text flow
-- refreshNgramsMaterialized :: Cmd err ()
-- refreshNgramsMaterialized = void $ execPGSQuery refreshNgramsMaterializedQuery ()
-- where
-- refreshNgramsMaterializedQuery :: DPS.Query
-- refreshNgramsMaterializedQuery =
-- [sql| REFRESH MATERIALIZED VIEW CONCURRENTLY context_node_ngrams_view; |]
src/Gargantext/Database/Action/Search.hs
View file @
2a514b43
...
...
@@ -17,7 +17,6 @@ module Gargantext.Database.Action.Search (
searchInCorpus
,
searchInCorpusWithContacts
,
searchCountInCorpus
,
searchInCorpusWithNgrams
,
searchDocInDatabase
)
where
...
...
@@ -34,7 +33,6 @@ import Data.Time (UTCTime)
import
Gargantext.Core
(
Lang
(
EN
),
HasDBid
(
toDBid
)
)
import
Gargantext.Core.Text.Corpus.Query
qualified
as
API
import
Gargantext.Core.Text.Terms.Mono.Stem
(
stem
,
StemmingAlgorithm
(
..
))
import
Gargantext.Core.Text.Ngrams
(
NgramsType
(
..
))
import
Gargantext.Core.Types
import
Gargantext.Core.Types.Query
(
IsTrash
,
Limit
,
Offset
)
import
Gargantext.Database.Admin.Types.Hyperdata.Contact
(
HyperdataContact
(
..
)
)
...
...
@@ -154,20 +152,6 @@ searchDocInDatabase p t = runOpaQuery (queryDocInDatabase p t)
returnA
-<
(
_ns_id
row
,
_ns_hyperdata
row
)
------------------------------------------------------------------------
-- | Search ngrams in documents, ranking them by TF-IDF. We narrow our
-- search only to map/candidate terms.
searchInCorpusWithNgrams
::
HasDBid
NodeType
=>
CorpusId
->
ListId
->
IsTrash
->
NgramsType
->
[[
Text
]]
->
Maybe
Offset
->
Maybe
Limit
->
Maybe
OrderBy
->
DBCmd
err
[
FacetDoc
]
searchInCorpusWithNgrams
_cId
_lId
_t
_ngt
_q
_o
_l
_order
=
undefined
-- | Compute TF-IDF for all 'ngramIds' in given 'CorpusId'. In this
-- case only the "TF" part makes sense and so we only compute the
-- ratio of "number of times our terms appear in given document" and
...
...
src/Gargantext/Database/Admin/Config.hs
View file @
2a514b43
...
...
@@ -34,9 +34,6 @@ corpusMasterName = "Main"
userMaster
::
Text
userMaster
=
"gargantua"
userArbitrary
::
Text
userArbitrary
=
"user1"
instance
HasDBid
NodeType
where
toDBid
n
=
nodeTypes
Bimap
.!
n
-- nodeTypes is total, this cannot fail by construction
lookupDBid
i
=
Bimap
.
lookupR
i
nodeTypes
...
...
src/Gargantext/Database/Admin/Trigger/NodesContexts.hs
View file @
2a514b43
...
...
@@ -11,7 +11,7 @@ Triggers on NodesNodes table.
-}
{-# LANGUAGE QuasiQuotes
#-}
{-# LANGUAGE QuasiQuotes #-}
module
Gargantext.Database.Admin.Trigger.NodesContexts
where
...
...
@@ -141,103 +141,3 @@ triggerUpdateDel lId = execPGSQuery query (lId, toDBid NodeList)
WHEN (OLD.category >= 1 AND NEW.category <= 0)
EXECUTE PROCEDURE set_update_ngrams_count_del();
|]
triggerDeleteCount
::
MasterListId
->
DBCmd
err
Int64
triggerDeleteCount
lId
=
execPGSQuery
query
(
lId
,
toDBid
NodeList
)
where
query
::
DPS
.
Query
query
=
[
sql
|
CREATE OR REPLACE FUNCTION set_delete_count() RETURNS trigger AS $$
BEGIN
UPDATE context_node_ngrams SET weight = weight - d.delete_count
FROM ( SELECT lists.id as node_id
, old1.context_id as context_id
, nnn.ngrams_id as ngrams_id
, nnn.ngrams_type as ngrams_type
, count(*) as delete_count FROM OLD as old1
INNER JOIN contexts doc ON doc.id = old1.context_id
INNER JOIN nodes lists ON lists.parent_id = old1.node_id
INNER JOIN context_node_ngrams nnn ON nnn.context_id = doc.id
WHERE nnn.node_id in (?, lists.id)
AND lists.typename = ?
GROUP BY old1.context_id, lists.id, nnn.ngrams_id, nnn.ngrams_type
) AS d
WHERE context_node_ngrams.context_id = d.context_id
AND context_node_ngrams.node_id = d.node_id
AND context_node_ngrams.ngrams_id = d.ngrams_id
AND context_node_ngrams.ngrams_type = d.ngrams_type
;
RETURN NULL;
END
$$ LANGUAGE plpgsql;
-- DROP trigger trigger_delete_count on nodes_nodes;
CREATE TRIGGER trigger_delete_count AFTER DELETE on nodes_contexts
REFERENCING OLD TABLE AS OLD
FOR EACH STATEMENT
EXECUTE PROCEDURE set_delete_count();
|]
-- TODO add groups
{-
triggerCoocInsert :: MasterListId -> Cmd err Int64
triggerCoocInsert lid = execPGSQuery query ( lid
-- , nodeTypeId NodeCorpus
-- , nodeTypeId NodeDocument
-- , nodeTypeId NodeList
, toDBid CandidateTerm
, toDBid CandidateTerm
)
where
query :: DPS.Query
query = [sql|
CREATE OR REPLACE FUNCTION nodes_nodes_set_cooc() RETURNS trigger AS $$
BEGIN
IF pg_trigger_depth() <> 1 THEN
RETURN NEW;
END IF;
IF TG_OP = 'INSERT' THEN
INSERT INTO node_nodengrams_nodengrams (node_id, node_ngrams1_id, node_ngrams2_id, weight)
WITH input(corpus_id, nn1, nn2, weight) AS (
SELECT new1.node1_id, nn1.id, nn2.id, count(*) from NEW as new1
INNER JOIN node_ngrams nn1
ON nn1.node_id = ? -- COALESCE(?,?) --(masterList, userList)
INNER JOIN node_ngrams nn2
ON nn2.node_id = nn1.node_id
INNER JOIN node_node_ngrams2 nnn1
ON nnn1.node_id = new1.node2_id
INNER JOIN node_node_ngrams2 nnn2
ON nnn2.node_id = new1.node2_id
WHERE nnn1.nodengrams_id = nn1.id
AND nnn2.nodengrams_id = nn2.id
AND nn1.id < nn2.id
AND nn1.node_subtype >= ?
AND nn2.node_subtype >= ?
GROUP BY new1.node1_id, nn1.id, nn2.id
)
SELECT * from input where weight >= 1
ON CONFLICT (node_id, node_ngrams1_id, node_ngrams2_id)
DO UPDATE set weight = node_nodengrams_nodengrams.weight + excluded.weight
;
END IF;
RETURN NULL;
END
$$ LANGUAGE plpgsql;
-- DROP trigger trigger_cooc on node_node_ngrams2;
CREATE TRIGGER trigger_cooc_insert AFTER INSERT on nodes_nodes
REFERENCING NEW TABLE AS NEW
FOR EACH STATEMENT
EXECUTE PROCEDURE nodes_nodes_set_cooc();
|]
-}
src/Gargantext/Database/Admin/Types/Hyperdata/Contact.hs
View file @
2a514b43
...
...
@@ -70,20 +70,6 @@ data ContactMetaData =
,
_cm_lastValidation
::
Maybe
Text
-- TODO UTCTIME
}
deriving
(
Eq
,
Show
,
Generic
)
defaultContactMetaData
::
ContactMetaData
defaultContactMetaData
=
ContactMetaData
(
Just
"bdd"
)
(
Just
"TODO UTCTime"
)
arbitraryHyperdataContact
::
HyperdataContact
arbitraryHyperdataContact
=
HyperdataContact
{
_hc_bdd
=
Nothing
,
_hc_who
=
Nothing
,
_hc_where
=
[]
,
_hc_title
=
Nothing
,
_hc_source
=
Nothing
,
_hc_lastValidation
=
Nothing
}
data
ContactWho
=
ContactWho
{
_cw_id
::
Maybe
Text
,
_cw_firstName
::
Maybe
Text
...
...
src/Gargantext/Database/Admin/Types/Hyperdata/CorpusField.hs
View file @
2a514b43
...
...
@@ -57,8 +57,6 @@ data HyperdataField a =
,
_hf_name
::
!
Text
,
_hf_data
::
!
a
}
deriving
(
Generic
,
Show
)
defaultHyperdataField
::
HyperdataField
CorpusField
defaultHyperdataField
=
HyperdataField
Markdown
"name"
defaultCorpusField
------------------------------------------------------------------------
-- Instances
...
...
src/Gargantext/Database/Admin/Types/Hyperdata/Frame.hs
View file @
2a514b43
...
...
@@ -69,9 +69,3 @@ getHyperdataFrameContents (HyperdataFrame { _hf_base, _hf_frame_id }) = do
_
<-
Wreq
.
headWith
Wreq
.
defaults
$
T
.
unpack
path
r
<-
Wreq
.
get
$
T
.
unpack
path
pure
$
decodeUtf8
$
toStrict
$
r
^.
Wreq
.
responseBody
getHyperdataFrameTSV
::
HyperdataFrame
->
IO
Text
getHyperdataFrameTSV
(
HyperdataFrame
{
_hf_base
,
_hf_frame_id
})
=
do
let
path
=
T
.
concat
[
_hf_base
,
"/"
,
_hf_frame_id
,
".csv"
]
r
<-
Wreq
.
get
$
T
.
unpack
path
pure
$
decodeUtf8
$
toStrict
$
r
^.
Wreq
.
responseBody
src/Gargantext/Database/GargDB.hs
View file @
2a514b43
...
...
@@ -60,10 +60,6 @@ type GargFilePath = (FolderPath, FileName)
type
FolderPath
=
FilePath
type
FileName
=
FilePath
--------------------------------
dataFilePath
::
(
ToJSON
a
)
=>
a
->
GargFilePath
dataFilePath
=
toPath
.
hash
.
Prelude
.
show
.
toJSON
randomFilePath
::
(
MonadReader
env
m
,
MonadBase
IO
m
...
...
@@ -154,22 +150,6 @@ rmFile :: ( MonadReader env m
=>
FilePath
->
m
()
rmFile
=
onDisk_1
SD
.
removeFile
cpFile
::
(
MonadReader
env
m
,
MonadBase
IO
m
,
HasConfig
env
)
=>
FilePath
->
FilePath
->
m
()
cpFile
=
onDisk_2
SD
.
copyFile
---
mvFile
::
(
MonadReader
env
m
,
MonadBase
IO
m
,
HasConfig
env
)
=>
FilePath
->
FilePath
->
m
()
mvFile
fp1
fp2
=
do
cpFile
fp1
fp2
rmFile
fp1
pure
()
------------------------------------------------------------------------
onDisk_1
::
(
MonadReader
env
m
,
MonadBase
IO
m
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment