Verified Commit 142798f2 authored by Przemyslaw Kaminski's avatar Przemyslaw Kaminski

Merge branch '186-dev-ngrams-score-fixes' into 175-dev-doc-table-count

parents f61a1958 7cfb1f04
...@@ -28,6 +28,7 @@ import Gargantext.Database.Prelude (Cmd, runOpaQuery, runCountOpaQuery) ...@@ -28,6 +28,7 @@ import Gargantext.Database.Prelude (Cmd, runOpaQuery, runCountOpaQuery)
import Gargantext.Database.Query.Facet import Gargantext.Database.Query.Facet
import Gargantext.Database.Query.Filter import Gargantext.Database.Query.Filter
import Gargantext.Database.Query.Table.Node import Gargantext.Database.Query.Table.Node
import Gargantext.Database.Query.Table.Node.Error (HasNodeError())
import Gargantext.Database.Query.Table.Context import Gargantext.Database.Query.Table.Context
import Gargantext.Database.Query.Table.ContextNodeNgrams (queryContextNodeNgramsTable) import Gargantext.Database.Query.Table.ContextNodeNgrams (queryContextNodeNgramsTable)
import Gargantext.Database.Query.Table.NodeContext import Gargantext.Database.Query.Table.NodeContext
...@@ -77,10 +78,11 @@ searchInCorpusWithNgrams _cId _lId _t _ngt _q _o _l _order = undefined ...@@ -77,10 +78,11 @@ searchInCorpusWithNgrams _cId _lId _t _ngt _q _o _l _order = undefined
-- ratio of "number of times our terms appear in given document" and -- ratio of "number of times our terms appear in given document" and
-- "number of all terms in document" and return a sorted list of -- "number of all terms in document" and return a sorted list of
-- document ids -- document ids
tfidfAll :: CorpusId -> [Int] -> Cmd err [Int] tfidfAll :: (HasDBid NodeType, HasNodeError err) => CorpusId -> [Int] -> Cmd err [Int]
tfidfAll cId ngramIds = do tfidfAll cId ngramIds = do
let ngramIdsSet = Set.fromList ngramIds let ngramIdsSet = Set.fromList ngramIds
docsWithNgrams <- runOpaQuery (queryCorpusWithNgrams cId ngramIds) :: Cmd err [(Int, Int, Int)] lId <- defaultList cId
docsWithNgrams <- runOpaQuery (queryListWithNgrams lId ngramIds) :: Cmd err [(Int, Int, Int)]
-- NOTE The query returned docs with ANY ngramIds. We need to further -- NOTE The query returned docs with ANY ngramIds. We need to further
-- restrict to ALL ngramIds. -- restrict to ALL ngramIds.
let docsNgramsM = let docsNgramsM =
...@@ -108,14 +110,14 @@ tfidfAll cId ngramIds = do ...@@ -108,14 +110,14 @@ tfidfAll cId ngramIds = do
-- | Query for searching the 'context_node_ngrams' table so that we -- | Query for searching the 'context_node_ngrams' table so that we
-- find docs with ANY given 'ngramIds'. -- find docs with ANY given 'ngramIds'.
queryCorpusWithNgrams :: CorpusId -> [Int] -> Select (Column SqlInt4, Column SqlInt4, Column SqlInt4) queryListWithNgrams :: ListId -> [Int] -> Select (Column SqlInt4, Column SqlInt4, Column SqlInt4)
queryCorpusWithNgrams cId ngramIds = proc () -> do queryListWithNgrams lId ngramIds = proc () -> do
row <- queryContextNodeNgramsTable -< () row <- queryContextNodeNgramsTable -< ()
restrict -< (_cnng_node_id row) .== (pgNodeId cId) restrict -< (_cnng_node_id row) .== (pgNodeId lId)
restrict -< in_ (sqlInt4 <$> ngramIds) (_cnng_ngrams_id row) restrict -< in_ (sqlInt4 <$> ngramIds) (_cnng_ngrams_id row)
returnA -< ( _cnng_context_id row returnA -< ( _cnng_context_id row
, _cnng_ngrams_id row , _cnng_ngrams_id row
, _cnng_doc_count row) , _cnng_doc_count row )
--returnA -< row --returnA -< row
-- returnA -< ( _cnng_context_id row -- returnA -< ( _cnng_context_id row
-- , _cnng_node_id row -- , _cnng_node_id row
......
...@@ -168,7 +168,18 @@ getContextsForNgramsTerms cId ngramsTerms = do ...@@ -168,7 +168,18 @@ getContextsForNgramsTerms cId ngramsTerms = do
query :: PGS.Query query :: PGS.Query
query = [sql| SELECT t.id, t.hash_id, t.typename, t.user_id, t.parent_id, t.name, t.date, t.hyperdata, t.score, t.category query = [sql| SELECT t.id, t.hash_id, t.typename, t.user_id, t.parent_id, t.name, t.date, t.hyperdata, t.score, t.category
FROM ( FROM (
SELECT DISTINCT ON (contexts.id) contexts.id AS id, hash_id, typename, user_id, parent_id, name, date, hyperdata, nodes_contexts.score AS score, nodes_contexts.category AS category,context_node_ngrams.doc_count AS doc_count SELECT DISTINCT ON (contexts.id)
contexts.id AS id,
hash_id,
typename,
user_id,
parent_id,
name,
date,
hyperdata,
nodes_contexts.score AS score,
nodes_contexts.category AS category,
context_node_ngrams.doc_count AS doc_count
FROM contexts FROM contexts
JOIN context_node_ngrams ON contexts.id = context_node_ngrams.context_id JOIN context_node_ngrams ON contexts.id = context_node_ngrams.context_id
JOIN nodes_contexts ON contexts.id = nodes_contexts.context_id JOIN nodes_contexts ON contexts.id = nodes_contexts.context_id
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment