Commit 2a514b43 authored by Grégoire Locqueville's avatar Grégoire Locqueville

Removed some more dead code

parent 88b8e657
...@@ -31,7 +31,7 @@ import Gargantext.Core ( HasDBid(toDBid) ) ...@@ -31,7 +31,7 @@ import Gargantext.Core ( HasDBid(toDBid) )
import Gargantext.Core.Text.Ngrams (NgramsType(..)) import Gargantext.Core.Text.Ngrams (NgramsType(..))
import Gargantext.Data.HashMap.Strict.Utils as HM ( unionsWith ) import Gargantext.Data.HashMap.Strict.Utils as HM ( unionsWith )
import Gargantext.Database.Admin.Types.Hyperdata.Document import Gargantext.Database.Admin.Types.Hyperdata.Document
import Gargantext.Database.Admin.Types.Node (ListId, CorpusId, NodeId(..), ContextId (..), MasterCorpusId, NodeType(NodeDocument), UserCorpusId, DocId) import Gargantext.Database.Admin.Types.Node (ListId, CorpusId, NodeId(..), ContextId (..), NodeType(NodeDocument))
import Gargantext.Database.Prelude (DBCmd, runPGSQuery) import Gargantext.Database.Prelude (DBCmd, runPGSQuery)
import Gargantext.Database.Schema.Ngrams () -- toDBid instance import Gargantext.Database.Schema.Ngrams () -- toDBid instance
import Gargantext.Prelude import Gargantext.Prelude
...@@ -182,25 +182,6 @@ getOccByNgramsOnlyFast cId lId nt = do ...@@ -182,25 +182,6 @@ getOccByNgramsOnlyFast cId lId nt = do
FROM ns FROM ns
LEFT JOIN ncids_agg ON ns.ngrams_id = ncids_agg.ngrams_id LEFT JOIN ncids_agg ON ns.ngrams_id = ncids_agg.ngrams_id
|] |]
-- query = [sql|
-- WITH node_context_ids AS
-- (select context_id, ngrams_id
-- FROM context_node_ngrams_view
-- WHERE node_id = ?
-- ), ns AS
-- (select ngrams_id FROM node_stories
-- WHERE node_id = ? AND ngrams_type_id = ?
-- )
-- SELECT ng.terms,
-- ARRAY ( SELECT DISTINCT context_id
-- FROM node_context_ids
-- WHERE ns.ngrams_id = node_context_ids.ngrams_id
-- )
-- AS context_ids
-- FROM ngrams ng
-- JOIN ns ON ng.id = ns.ngrams_id
-- |]
selectNgramsOccurrencesOnlyByContextUser_withSample :: HasDBid NodeType selectNgramsOccurrencesOnlyByContextUser_withSample :: HasDBid NodeType
...@@ -245,26 +226,6 @@ queryNgramsOccurrencesOnlyByContextUser_withSample = [sql| ...@@ -245,26 +226,6 @@ queryNgramsOccurrencesOnlyByContextUser_withSample = [sql|
|] |]
-- queryNgramsOccurrencesOnlyByContextUser_withSample :: DPS.Query
-- queryNgramsOccurrencesOnlyByContextUser_withSample = [sql|
-- WITH nodes_sample AS (SELECT c.id FROM contexts c TABLESAMPLE SYSTEM_ROWS (?)
-- JOIN nodes_contexts nc ON c.id = nc.context_id
-- WHERE c.typename = ?
-- AND nc.node_id = ?),
-- input_rows(terms) AS (?)
-- SELECT ng.terms, COUNT(cng.context_id) FROM context_node_ngrams cng
-- JOIN ngrams ng ON cng.ngrams_id = ng.id
-- JOIN input_rows ir ON ir.terms = ng.terms
-- JOIN nodes_contexts nc ON nc.context_id = cng.context_id
-- JOIN nodes_sample ns ON nc.context_id = ns.id
-- WHERE nc.node_id = ? -- CorpusId
-- AND cng.ngrams_type = ? -- NgramsTypeId
-- AND nc.category > 0
-- -- AND nc.context_id IN (SELECT id FROM nodes_sample)
-- GROUP BY cng.node_id, ng.terms
-- |]
selectNgramsOccurrencesOnlyByContextUser_withSample' :: HasDBid NodeType selectNgramsOccurrencesOnlyByContextUser_withSample' :: HasDBid NodeType
=> CorpusId => CorpusId
-> Int -> Int
...@@ -362,125 +323,3 @@ queryNgramsOnlyByContextUser = [sql| ...@@ -362,125 +323,3 @@ queryNgramsOnlyByContextUser = [sql|
AND nc.category > 0 AND nc.category > 0
GROUP BY ng.terms, cng.context_id GROUP BY ng.terms, cng.context_id
|] |]
getNgramsByDocOnlyUser :: DocId
-> [ListId]
-> NgramsType
-> [NgramsTerm]
-> DBCmd err (HashMap NgramsTerm (Set NodeId))
getNgramsByDocOnlyUser cId ls nt ngs =
HM.unionsWith (<>)
. map (HM.fromListWith (<>) . map (second Set.singleton))
<$> mapM (selectNgramsOnlyByDocUser cId ls nt) (splitEvery 1000 ngs)
selectNgramsOnlyByDocUser :: DocId
-> [ListId]
-> NgramsType
-> [NgramsTerm]
-> DBCmd err [(NgramsTerm, NodeId)]
selectNgramsOnlyByDocUser dId ls nt tms =
fmap (first NgramsTerm) <$>
runPGSQuery queryNgramsOnlyByDocUser
( Values fields ((DPS.Only . unNgramsTerm) <$> tms)
, Values [QualifiedIdentifier Nothing "int4"]
(DPS.Only <$> (map DPS.toField ls))
, dId
, toDBid nt
)
where
fields = [QualifiedIdentifier Nothing "text"]
queryNgramsOnlyByDocUser :: DPS.Query
queryNgramsOnlyByDocUser = [sql|
WITH input_rows(terms) AS (?),
input_list(id) AS (?)
SELECT ng.terms, cng.node_id FROM context_node_ngrams cng
JOIN ngrams ng ON cng.ngrams_id = ng.id
JOIN input_rows ir ON ir.terms = ng.terms
JOIN input_list il ON il.id = cng.context_id
WHERE cng.node_id = ? -- DocId
AND cng.ngrams_type = ? -- NgramsTypeId
GROUP BY ng.terms, cng.node_id
|]
------------------------------------------------------------------------
-- | TODO filter by language, database, any social field
getContextsByNgramsMaster :: HasDBid NodeType
=> UserCorpusId
-> MasterCorpusId
-> DBCmd err (HashMap Text (Set NodeId))
getContextsByNgramsMaster ucId mcId = unionsWith (<>)
. map (HM.fromListWith (<>) . map (\(n,t) -> (t, Set.singleton n)))
-- . takeWhile (not . List.null)
-- . takeWhile (\l -> List.length l > 3)
<$> mapM (selectNgramsByContextMaster 1000 ucId mcId) [0,500..10000]
selectNgramsByContextMaster :: HasDBid NodeType
=> Int
-> UserCorpusId
-> MasterCorpusId
-> Int
-> DBCmd err [(NodeId, Text)]
selectNgramsByContextMaster n ucId mcId p = runPGSQuery
queryNgramsByContextMaster'
( ucId
, toDBid NgramsTerms
, toDBid NodeDocument
, p
, toDBid NodeDocument
, p
, n
, mcId
, toDBid NodeDocument
, toDBid NgramsTerms
)
-- | TODO fix context_node_ngrams relation
queryNgramsByContextMaster' :: DPS.Query
queryNgramsByContextMaster' = [sql|
WITH contextsByNgramsUser AS (
SELECT n.id, ng.terms FROM contexts n
JOIN nodes_contexts nn ON n.id = nn.context_id
JOIN context_node_ngrams cng ON cng.context_id = n.id
JOIN ngrams ng ON cng.ngrams_id = ng.id
WHERE nn.node_id = ? -- UserCorpusId
-- AND n.typename = ? -- toDBid
AND cng.ngrams_type = ? -- NgramsTypeId
AND nn.category > 0
AND node_pos(n.id,?) >= ?
AND node_pos(n.id,?) < ?
GROUP BY n.id, ng.terms
),
contextsByNgramsMaster AS (
SELECT n.id, ng.terms FROM contexts n TABLESAMPLE SYSTEM_ROWS(?)
JOIN context_node_ngrams cng ON n.id = cng.context_id
JOIN ngrams ng ON ng.id = cng.ngrams_id
WHERE n.parent_id = ? -- Master Corpus toDBid
AND n.typename = ? -- toDBid
AND cng.ngrams_type = ? -- NgramsTypeId
GROUP BY n.id, ng.terms
)
SELECT m.id, m.terms FROM nodesByNgramsMaster m
RIGHT JOIN contextsByNgramsUser u ON u.id = m.id
|]
-- | Refreshes the \"context_node_ngrams_view\" materialized view.
-- This function will be run :
-- - periodically
-- - at reindex stage
-- - at the end of each text flow
-- refreshNgramsMaterialized :: Cmd err ()
-- refreshNgramsMaterialized = void $ execPGSQuery refreshNgramsMaterializedQuery ()
-- where
-- refreshNgramsMaterializedQuery :: DPS.Query
-- refreshNgramsMaterializedQuery =
-- [sql| REFRESH MATERIALIZED VIEW CONCURRENTLY context_node_ngrams_view; |]
...@@ -17,7 +17,6 @@ module Gargantext.Database.Action.Search ( ...@@ -17,7 +17,6 @@ module Gargantext.Database.Action.Search (
searchInCorpus searchInCorpus
, searchInCorpusWithContacts , searchInCorpusWithContacts
, searchCountInCorpus , searchCountInCorpus
, searchInCorpusWithNgrams
, searchDocInDatabase , searchDocInDatabase
) where ) where
...@@ -34,7 +33,6 @@ import Data.Time (UTCTime) ...@@ -34,7 +33,6 @@ import Data.Time (UTCTime)
import Gargantext.Core ( Lang(EN), HasDBid(toDBid) ) import Gargantext.Core ( Lang(EN), HasDBid(toDBid) )
import Gargantext.Core.Text.Corpus.Query qualified as API import Gargantext.Core.Text.Corpus.Query qualified as API
import Gargantext.Core.Text.Terms.Mono.Stem (stem, StemmingAlgorithm(..)) import Gargantext.Core.Text.Terms.Mono.Stem (stem, StemmingAlgorithm(..))
import Gargantext.Core.Text.Ngrams (NgramsType(..))
import Gargantext.Core.Types import Gargantext.Core.Types
import Gargantext.Core.Types.Query (IsTrash, Limit, Offset) import Gargantext.Core.Types.Query (IsTrash, Limit, Offset)
import Gargantext.Database.Admin.Types.Hyperdata.Contact ( HyperdataContact(..) ) import Gargantext.Database.Admin.Types.Hyperdata.Contact ( HyperdataContact(..) )
...@@ -154,20 +152,6 @@ searchDocInDatabase p t = runOpaQuery (queryDocInDatabase p t) ...@@ -154,20 +152,6 @@ searchDocInDatabase p t = runOpaQuery (queryDocInDatabase p t)
returnA -< (_ns_id row, _ns_hyperdata row) returnA -< (_ns_id row, _ns_hyperdata row)
------------------------------------------------------------------------ ------------------------------------------------------------------------
-- | Search ngrams in documents, ranking them by TF-IDF. We narrow our
-- search only to map/candidate terms.
searchInCorpusWithNgrams :: HasDBid NodeType
=> CorpusId
-> ListId
-> IsTrash
-> NgramsType
-> [[Text]]
-> Maybe Offset
-> Maybe Limit
-> Maybe OrderBy
-> DBCmd err [FacetDoc]
searchInCorpusWithNgrams _cId _lId _t _ngt _q _o _l _order = undefined
-- | Compute TF-IDF for all 'ngramIds' in given 'CorpusId'. In this -- | Compute TF-IDF for all 'ngramIds' in given 'CorpusId'. In this
-- case only the "TF" part makes sense and so we only compute the -- case only the "TF" part makes sense and so we only compute the
-- ratio of "number of times our terms appear in given document" and -- ratio of "number of times our terms appear in given document" and
......
...@@ -34,9 +34,6 @@ corpusMasterName = "Main" ...@@ -34,9 +34,6 @@ corpusMasterName = "Main"
userMaster :: Text userMaster :: Text
userMaster = "gargantua" userMaster = "gargantua"
userArbitrary :: Text
userArbitrary = "user1"
instance HasDBid NodeType where instance HasDBid NodeType where
toDBid n = nodeTypes Bimap.! n -- nodeTypes is total, this cannot fail by construction toDBid n = nodeTypes Bimap.! n -- nodeTypes is total, this cannot fail by construction
lookupDBid i = Bimap.lookupR i nodeTypes lookupDBid i = Bimap.lookupR i nodeTypes
......
...@@ -11,7 +11,7 @@ Triggers on NodesNodes table. ...@@ -11,7 +11,7 @@ Triggers on NodesNodes table.
-} -}
{-# LANGUAGE QuasiQuotes #-} {-# LANGUAGE QuasiQuotes #-}
module Gargantext.Database.Admin.Trigger.NodesContexts module Gargantext.Database.Admin.Trigger.NodesContexts
where where
...@@ -141,103 +141,3 @@ triggerUpdateDel lId = execPGSQuery query (lId, toDBid NodeList) ...@@ -141,103 +141,3 @@ triggerUpdateDel lId = execPGSQuery query (lId, toDBid NodeList)
WHEN (OLD.category >= 1 AND NEW.category <= 0) WHEN (OLD.category >= 1 AND NEW.category <= 0)
EXECUTE PROCEDURE set_update_ngrams_count_del(); EXECUTE PROCEDURE set_update_ngrams_count_del();
|] |]
triggerDeleteCount :: MasterListId -> DBCmd err Int64
triggerDeleteCount lId = execPGSQuery query (lId, toDBid NodeList)
where
query :: DPS.Query
query = [sql|
CREATE OR REPLACE FUNCTION set_delete_count() RETURNS trigger AS $$
BEGIN
UPDATE context_node_ngrams SET weight = weight - d.delete_count
FROM ( SELECT lists.id as node_id
, old1.context_id as context_id
, nnn.ngrams_id as ngrams_id
, nnn.ngrams_type as ngrams_type
, count(*) as delete_count FROM OLD as old1
INNER JOIN contexts doc ON doc.id = old1.context_id
INNER JOIN nodes lists ON lists.parent_id = old1.node_id
INNER JOIN context_node_ngrams nnn ON nnn.context_id = doc.id
WHERE nnn.node_id in (?, lists.id)
AND lists.typename = ?
GROUP BY old1.context_id, lists.id, nnn.ngrams_id, nnn.ngrams_type
) AS d
WHERE context_node_ngrams.context_id = d.context_id
AND context_node_ngrams.node_id = d.node_id
AND context_node_ngrams.ngrams_id = d.ngrams_id
AND context_node_ngrams.ngrams_type = d.ngrams_type
;
RETURN NULL;
END
$$ LANGUAGE plpgsql;
-- DROP trigger trigger_delete_count on nodes_nodes;
CREATE TRIGGER trigger_delete_count AFTER DELETE on nodes_contexts
REFERENCING OLD TABLE AS OLD
FOR EACH STATEMENT
EXECUTE PROCEDURE set_delete_count();
|]
-- TODO add groups
{-
triggerCoocInsert :: MasterListId -> Cmd err Int64
triggerCoocInsert lid = execPGSQuery query ( lid
-- , nodeTypeId NodeCorpus
-- , nodeTypeId NodeDocument
-- , nodeTypeId NodeList
, toDBid CandidateTerm
, toDBid CandidateTerm
)
where
query :: DPS.Query
query = [sql|
CREATE OR REPLACE FUNCTION nodes_nodes_set_cooc() RETURNS trigger AS $$
BEGIN
IF pg_trigger_depth() <> 1 THEN
RETURN NEW;
END IF;
IF TG_OP = 'INSERT' THEN
INSERT INTO node_nodengrams_nodengrams (node_id, node_ngrams1_id, node_ngrams2_id, weight)
WITH input(corpus_id, nn1, nn2, weight) AS (
SELECT new1.node1_id, nn1.id, nn2.id, count(*) from NEW as new1
INNER JOIN node_ngrams nn1
ON nn1.node_id = ? -- COALESCE(?,?) --(masterList, userList)
INNER JOIN node_ngrams nn2
ON nn2.node_id = nn1.node_id
INNER JOIN node_node_ngrams2 nnn1
ON nnn1.node_id = new1.node2_id
INNER JOIN node_node_ngrams2 nnn2
ON nnn2.node_id = new1.node2_id
WHERE nnn1.nodengrams_id = nn1.id
AND nnn2.nodengrams_id = nn2.id
AND nn1.id < nn2.id
AND nn1.node_subtype >= ?
AND nn2.node_subtype >= ?
GROUP BY new1.node1_id, nn1.id, nn2.id
)
SELECT * from input where weight >= 1
ON CONFLICT (node_id, node_ngrams1_id, node_ngrams2_id)
DO UPDATE set weight = node_nodengrams_nodengrams.weight + excluded.weight
;
END IF;
RETURN NULL;
END
$$ LANGUAGE plpgsql;
-- DROP trigger trigger_cooc on node_node_ngrams2;
CREATE TRIGGER trigger_cooc_insert AFTER INSERT on nodes_nodes
REFERENCING NEW TABLE AS NEW
FOR EACH STATEMENT
EXECUTE PROCEDURE nodes_nodes_set_cooc();
|]
-}
...@@ -70,20 +70,6 @@ data ContactMetaData = ...@@ -70,20 +70,6 @@ data ContactMetaData =
, _cm_lastValidation :: Maybe Text -- TODO UTCTIME , _cm_lastValidation :: Maybe Text -- TODO UTCTIME
} deriving (Eq, Show, Generic) } deriving (Eq, Show, Generic)
defaultContactMetaData :: ContactMetaData
defaultContactMetaData = ContactMetaData (Just "bdd") (Just "TODO UTCTime")
arbitraryHyperdataContact :: HyperdataContact
arbitraryHyperdataContact =
HyperdataContact
{ _hc_bdd = Nothing
, _hc_who = Nothing
, _hc_where = []
, _hc_title = Nothing
, _hc_source = Nothing
, _hc_lastValidation = Nothing }
data ContactWho = data ContactWho =
ContactWho { _cw_id :: Maybe Text ContactWho { _cw_id :: Maybe Text
, _cw_firstName :: Maybe Text , _cw_firstName :: Maybe Text
......
...@@ -57,8 +57,6 @@ data HyperdataField a = ...@@ -57,8 +57,6 @@ data HyperdataField a =
, _hf_name :: !Text , _hf_name :: !Text
, _hf_data :: !a , _hf_data :: !a
} deriving (Generic, Show) } deriving (Generic, Show)
defaultHyperdataField :: HyperdataField CorpusField
defaultHyperdataField = HyperdataField Markdown "name" defaultCorpusField
------------------------------------------------------------------------ ------------------------------------------------------------------------
-- Instances -- Instances
......
...@@ -69,9 +69,3 @@ getHyperdataFrameContents (HyperdataFrame { _hf_base, _hf_frame_id }) = do ...@@ -69,9 +69,3 @@ getHyperdataFrameContents (HyperdataFrame { _hf_base, _hf_frame_id }) = do
_ <- Wreq.headWith Wreq.defaults $ T.unpack path _ <- Wreq.headWith Wreq.defaults $ T.unpack path
r <- Wreq.get $ T.unpack path r <- Wreq.get $ T.unpack path
pure $ decodeUtf8 $ toStrict $ r ^. Wreq.responseBody pure $ decodeUtf8 $ toStrict $ r ^. Wreq.responseBody
getHyperdataFrameTSV :: HyperdataFrame -> IO Text
getHyperdataFrameTSV (HyperdataFrame { _hf_base, _hf_frame_id }) = do
let path = T.concat [_hf_base, "/", _hf_frame_id, ".csv"]
r <- Wreq.get $ T.unpack path
pure $ decodeUtf8 $ toStrict $ r ^. Wreq.responseBody
...@@ -60,10 +60,6 @@ type GargFilePath = (FolderPath, FileName) ...@@ -60,10 +60,6 @@ type GargFilePath = (FolderPath, FileName)
type FolderPath = FilePath type FolderPath = FilePath
type FileName = FilePath type FileName = FilePath
--------------------------------
dataFilePath :: (ToJSON a) => a -> GargFilePath
dataFilePath = toPath . hash . Prelude.show . toJSON
randomFilePath :: ( MonadReader env m randomFilePath :: ( MonadReader env m
, MonadBase IO m , MonadBase IO m
...@@ -154,22 +150,6 @@ rmFile :: ( MonadReader env m ...@@ -154,22 +150,6 @@ rmFile :: ( MonadReader env m
=> FilePath -> m () => FilePath -> m ()
rmFile = onDisk_1 SD.removeFile rmFile = onDisk_1 SD.removeFile
cpFile :: (MonadReader env m, MonadBase IO m, HasConfig env)
=> FilePath -> FilePath -> m ()
cpFile = onDisk_2 SD.copyFile
---
mvFile :: ( MonadReader env m
, MonadBase IO m
, HasConfig env
)
=> FilePath -> FilePath -> m ()
mvFile fp1 fp2 = do
cpFile fp1 fp2
rmFile fp1
pure ()
------------------------------------------------------------------------ ------------------------------------------------------------------------
onDisk_1 :: ( MonadReader env m onDisk_1 :: ( MonadReader env m
, MonadBase IO m , MonadBase IO m
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment