Commit 87420e23 authored by Alexandre Delanoë's avatar Alexandre Delanoë

Merge remote-tracking branch 'origin/175-dev-doc-table-count' into dev-merge

parents 6554283e e3660357
......@@ -48,8 +48,9 @@ import Gargantext.Core.Utils.Prefix (unPrefix, unPrefixSwagger)
import Gargantext.Database.Action.Learn (FavOrTrash(..), moreLike)
import Gargantext.Database.Action.Search
import Gargantext.Database.Admin.Types.Node
import Gargantext.Database.Prelude -- (Cmd, CmdM)
import Gargantext.Database.Query.Table.Node.Error (HasNodeError)
import Gargantext.Database.Query.Facet (FacetDoc , runViewDocuments, runCountDocuments, OrderBy(..), runViewAuthorsDoc)
import Gargantext.Database.Prelude -- (Cmd, CmdM)
import Gargantext.Prelude
------------------------------------------------------------------------
......@@ -100,7 +101,8 @@ tableApi id' = getTableApi id'
:<|> getTableHashApi id'
getTableApi :: NodeId
getTableApi :: HasNodeError err
=> NodeId
-> Maybe TabType
-> Maybe ListId
-> Maybe Int
......@@ -115,14 +117,16 @@ getTableApi cId tabType _mListId mLimit mOffset mOrderBy mQuery mYear = do
t <- getTable cId tabType mOffset mLimit mOrderBy mQuery mYear
pure $ constructHashedResponse t
postTableApi :: NodeId -> TableQuery -> Cmd err FacetTableResult
postTableApi :: HasNodeError err
=> NodeId -> TableQuery -> Cmd err FacetTableResult
postTableApi cId (TableQuery o l order ft "") = getTable cId (Just ft) (Just o) (Just l) (Just order) Nothing Nothing
postTableApi cId (TableQuery o l order ft q) = case ft of
Docs -> searchInCorpus' cId False [q] (Just o) (Just l) (Just order)
Trash -> searchInCorpus' cId True [q] (Just o) (Just l) (Just order)
x -> panic $ "not implemented in tableApi " <> (cs $ show x)
getTableHashApi :: NodeId -> Maybe TabType -> Cmd err Text
getTableHashApi :: HasNodeError err
=> NodeId -> Maybe TabType -> Cmd err Text
getTableHashApi cId tabType = do
HashedResponse { hash = h } <- getTableApi cId tabType Nothing Nothing Nothing Nothing Nothing Nothing
pure h
......@@ -140,7 +144,8 @@ searchInCorpus' cId t q o l order = do
pure $ TableResult { tr_docs = docs, tr_count = countAllDocs }
getTable :: NodeId
getTable :: HasNodeError err
=> NodeId
-> Maybe TabType
-> Maybe Offset
-> Maybe Limit
......@@ -153,7 +158,8 @@ getTable cId ft o l order query year = do
docsCount <- runCountDocuments cId (ft == Just Trash) query year
pure $ TableResult { tr_docs = docs, tr_count = docsCount }
getTable' :: NodeId
getTable' :: HasNodeError err
=> NodeId
-> Maybe TabType
-> Maybe Offset
-> Maybe Limit
......
......@@ -19,9 +19,10 @@ import Data.Maybe
import Data.Text (Text)
import Gargantext.Core
import Gargantext.Core.Types (Offset, Limit)
import Gargantext.Database.Query.Facet
import Gargantext.Database.Admin.Types.Hyperdata
import Gargantext.Database.Admin.Types.Node
import Gargantext.Database.Query.Facet
import Gargantext.Database.Query.Table.Node.Error (HasNodeError)
import Gargantext.Database.Prelude (Cmd)
import Gargantext.Prelude
import Gargantext.Core.Text.Learn
......@@ -32,7 +33,7 @@ data FavOrTrash = IsFav | IsTrash
deriving (Eq)
moreLike :: HasDBid NodeType
moreLike :: (HasDBid NodeType, HasNodeError err)
=> CorpusId -> Maybe Offset -> Maybe Limit -> Maybe OrderBy
-> FavOrTrash -> Cmd err [FacetDoc]
moreLike cId o _l order ft = do
......@@ -40,7 +41,8 @@ moreLike cId o _l order ft = do
moreLikeWith cId o (Just 3) order ft priors
---------------------------------------------------------------------------
getPriors :: HasDBid NodeType => FavOrTrash -> CorpusId -> Cmd err (Events Bool)
getPriors :: (HasDBid NodeType, HasNodeError err)
=> FavOrTrash -> CorpusId -> Cmd err (Events Bool)
getPriors ft cId = do
docs_fav <- filter (\(FacetDoc _ _ _ _ f _ _) -> f == Just 2)
......@@ -56,7 +58,7 @@ getPriors ft cId = do
pure priors
moreLikeWith :: HasDBid NodeType
moreLikeWith :: (HasDBid NodeType, HasNodeError err)
=> CorpusId -> Maybe Offset -> Maybe Limit -> Maybe OrderBy
-> FavOrTrash -> Events Bool -> Cmd err [FacetDoc]
moreLikeWith cId o l order ft priors = do
......
......@@ -28,6 +28,7 @@ import Gargantext.Database.Prelude (Cmd, runOpaQuery, runCountOpaQuery)
import Gargantext.Database.Query.Facet
import Gargantext.Database.Query.Filter
import Gargantext.Database.Query.Table.Node
import Gargantext.Database.Query.Table.Node.Error (HasNodeError())
import Gargantext.Database.Query.Table.Context
import Gargantext.Database.Query.Table.ContextNodeNgrams (queryContextNodeNgramsTable)
import Gargantext.Database.Query.Table.NodeContext
......@@ -77,10 +78,11 @@ searchInCorpusWithNgrams _cId _lId _t _ngt _q _o _l _order = undefined
-- ratio of "number of times our terms appear in given document" and
-- "number of all terms in document" and return a sorted list of
-- document ids
tfidfAll :: CorpusId -> [Int] -> Cmd err [Int]
tfidfAll :: (HasDBid NodeType, HasNodeError err) => CorpusId -> [Int] -> Cmd err [Int]
tfidfAll cId ngramIds = do
let ngramIdsSet = Set.fromList ngramIds
docsWithNgrams <- runOpaQuery (queryCorpusWithNgrams cId ngramIds) :: Cmd err [(Int, Int, Int)]
lId <- defaultList cId
docsWithNgrams <- runOpaQuery (queryListWithNgrams lId ngramIds) :: Cmd err [(Int, Int, Int)]
-- NOTE The query returned docs with ANY ngramIds. We need to further
-- restrict to ALL ngramIds.
let docsNgramsM =
......@@ -108,14 +110,14 @@ tfidfAll cId ngramIds = do
-- | Query for searching the 'context_node_ngrams' table so that we
-- find docs with ANY given 'ngramIds'.
queryCorpusWithNgrams :: CorpusId -> [Int] -> Select (Column SqlInt4, Column SqlInt4, Column SqlInt4)
queryCorpusWithNgrams cId ngramIds = proc () -> do
queryListWithNgrams :: ListId -> [Int] -> Select (Column SqlInt4, Column SqlInt4, Column SqlInt4)
queryListWithNgrams lId ngramIds = proc () -> do
row <- queryContextNodeNgramsTable -< ()
restrict -< (_cnng_node_id row) .== (pgNodeId cId)
restrict -< (_cnng_node_id row) .== (pgNodeId lId)
restrict -< in_ (sqlInt4 <$> ngramIds) (_cnng_ngrams_id row)
returnA -< ( _cnng_context_id row
, _cnng_ngrams_id row
, _cnng_doc_count row)
, _cnng_doc_count row )
--returnA -< row
-- returnA -< ( _cnng_context_id row
-- , _cnng_node_id row
......
......@@ -10,6 +10,7 @@ Portability : POSIX
{-# LANGUAGE Arrows #-}
{-# LANGUAGE FunctionalDependencies #-}
{-# LANGUAGE LambdaCase #-}
{-# LANGUAGE QuasiQuotes #-}
{-# LANGUAGE NoMonomorphismRestriction #-}
{-# LANGUAGE TemplateHaskell #-}
......@@ -18,7 +19,7 @@ Portability : POSIX
module Gargantext.Database.Query.Facet
( runViewAuthorsDoc
, runViewDocuments
-- , viewDocuments'
, viewDocuments
, runCountDocuments
, filterWith
......@@ -40,22 +41,24 @@ import Control.Arrow (returnA)
import Control.Lens ((^.))
import qualified Data.Text as T
import Opaleye
import qualified Opaleye.Aggregate as OAgg
import Protolude hiding (null, map, sum, not)
import qualified Opaleye.Internal.Unpackspec()
import Gargantext.Core
import Gargantext.Core.Types
import Gargantext.Database.Query.Filter
import Gargantext.Database.Query.Table.Ngrams
import Gargantext.Database.Query.Table.Context
import Gargantext.Database.Query.Facet.Types
import Gargantext.Database.Query.Table.ContextNodeNgrams
import Gargantext.Database.Query.Table.Ngrams
import Gargantext.Database.Query.Table.Node (defaultList)
import Gargantext.Database.Query.Table.Node.Error (HasNodeError)
import Gargantext.Database.Query.Table.NodeContext (queryNodeContextTable)
import Gargantext.Database.Query.Facet.Types
import Gargantext.Database.Prelude
import Gargantext.Database.Schema.Context
import Gargantext.Database.Schema.Node
import Gargantext.Database.Schema.NodeContext
-- import Gargantext.Prelude (printDebug)
------------------------------------------------------------------------
......@@ -82,7 +85,7 @@ viewAuthorsDoc cId _ nt = proc () -> do
--(doc,(_,(_,(_,contact')))) <- queryAuthorsDoc -< ()
(doc, _, _, _, contact') <- queryAuthorsDoc -< ()
restrict -< fromMaybeFields (sqlInt4 $ -1) (_node_id <$> contact') .=== pgNodeId cId
restrict -< fromMaybeFields (sqlInt4 $ -1) (_node_id <$> contact') .=== pgNodeId cId
restrict -< _node_typename doc .== sqlInt4 (toDBid nt)
returnA -< FacetDoc { facetDoc_id = _node_id doc
......@@ -114,7 +117,7 @@ queryAuthorsDoc = proc () -> do
------------------------------------------------------------------------
-- TODO-SECURITY check
runViewDocuments :: HasDBid NodeType
runViewDocuments :: (HasDBid NodeType, HasNodeError err)
=> CorpusId
-> IsTrash
-> Maybe Offset
......@@ -124,34 +127,67 @@ runViewDocuments :: HasDBid NodeType
-> Maybe Text
-> Cmd err [FacetDoc]
runViewDocuments cId t o l order query year = do
-- printDebug "[runViewDocuments] sqlQuery" $ showSql sqlQuery
runOpaQuery $ filterWith o l order sqlQuery
listId <- defaultList cId
res <- runOpaQuery $ filterWith' o l order (sqlQuery listId) :: Cmd err [FacetDocAgg']
pure $ remapNgramsCount <$> res
where
sqlQuery = viewDocuments cId t (toDBid NodeDocument) query year
sqlQuery lId = viewDocuments cId lId t (toDBid NodeDocument) query year
remapNgramsCount (FacetDoc { .. }) =
FacetDoc { facetDoc_ngramCount = Just $ fromIntegral facetDoc_ngramCount
, facetDoc_score = Just $ fromIntegral facetDoc_score
, .. }
runCountDocuments :: HasDBid NodeType => CorpusId -> IsTrash -> Maybe Text -> Maybe Text -> Cmd err Int
runCountDocuments :: (HasDBid NodeType, HasNodeError err)
=> CorpusId -> IsTrash -> Maybe Text -> Maybe Text -> Cmd err Int
runCountDocuments cId t mQuery mYear = do
runCountOpaQuery sqlQuery
listId <- defaultList cId
runCountOpaQuery (sqlQuery listId)
where
sqlQuery = viewDocuments cId t (toDBid NodeDocument) mQuery mYear
sqlQuery lId = viewDocuments cId lId t (toDBid NodeDocument) mQuery mYear
viewDocuments :: CorpusId
-> IsTrash
-> NodeTypeId
-> Maybe Text
-> Maybe Text
-> Select FacetDocRead
viewDocuments cId t ntId mQuery mYear = proc () -> do
-> ListId
-> IsTrash
-> NodeTypeId
-> Maybe Text
-> Maybe Text
-> Select FacetDocAgg
viewDocuments cId lId t ntId mQuery mYear =
aggregate (pFacetDoc FacetDoc { facetDoc_id = OAgg.groupBy
, facetDoc_created = OAgg.groupBy
, facetDoc_title = OAgg.groupBy
, facetDoc_hyperdata = OAgg.groupBy
, facetDoc_category = OAgg.groupBy
, facetDoc_ngramCount = OAgg.sumInt4
, facetDoc_score = OAgg.sumInt4 })
(viewDocumentsAgg cId lId t ntId mQuery mYear)
viewDocumentsAgg :: CorpusId
-> ListId
-> IsTrash
-> NodeTypeId
-> Maybe Text
-> Maybe Text
-> Select FacetDocAggPart
viewDocumentsAgg cId lId t ntId mQuery mYear = proc () -> do
(c, nc) <- viewDocumentsQuery cId t ntId mQuery mYear -< ()
-- ngramCountAgg <- aggregate sumInt4 -< cnng
cnng <- optionalRestrict queryContextNodeNgramsTable -<
\cnng' -> (cnng' ^. cnng_node_id) .== pgNodeId lId .&& -- (nc ^. nc_node_id) .&&
(cnng' ^. cnng_context_id) .== (c ^. cs_id)
let ngramCount = fromMaybeFields 0 $ _cnng_doc_count <$> cnng
returnA -< FacetDoc { facetDoc_id = _cs_id c
, facetDoc_created = _cs_date c
, facetDoc_title = _cs_name c
, facetDoc_hyperdata = _cs_hyperdata c
, facetDoc_category = toNullable $ nc^.nc_category
, facetDoc_ngramCount = toNullable $ nc^.nc_score
, facetDoc_score = toNullable $ nc^.nc_score
, facetDoc_category = nc ^. nc_category
, facetDoc_ngramCount = ngramCount
-- NOTE This is a slight abuse of "score" but
-- currently it is all 0's in the DB and the
-- search functionality on the frontend orders
-- by Score.
, facetDoc_score = ngramCount
}
-- TODO Join with context_node_ngrams at context_id/node_id and sum by
......@@ -162,19 +198,12 @@ viewDocumentsQuery :: CorpusId
-> Maybe Text
-> Maybe Text
-> Select (ContextSearchRead, NodeContextRead)
-- -> Select (ContextSearchRead, NodeContextRead, MaybeFields ContextNodeNgramsRead)
viewDocumentsQuery cId t ntId mQuery mYear = proc () -> do
c <- queryContextSearchTable -< ()
-- let joinCond (nc, cnn) = do
-- restrict -< (nc ^. context_id) .== (cnn ^. context_id)
-- restrict -< (nc ^. node_id) .== (cnn ^. node_id) -- :: (NodeContextRead, ContextNodeNgramsRead) -> Field SqlBool
nc <- queryNodeContextTable -< ()
restrict -< (c^.cs_id) .== (nc^.nc_context_id)
restrict -< nc^.nc_node_id .== pgNodeId cId
restrict -< c^.cs_typename .== sqlInt4 ntId
-- cnng <- optionalRestrict queryContextNodeNgramsTable -<
-- (\cnng' -> (nc ^. nc_context_id) .== (cnng' ^. cnng_context_id) .&&
-- (nc ^. nc_node_id) .== (cnng' ^. cnng_node_id))
restrict -< if t then nc^.nc_category .== sqlInt4 0
else nc^.nc_category .>= sqlInt4 1
......@@ -229,6 +258,40 @@ orderWith (Just TagDesc) = descNullsLast facetDoc_category
orderWith _ = asc facetDoc_created
filterWith' :: (SqlOrd date, SqlOrd title, SqlOrd category, SqlOrd score, hyperdata ~ SqlJsonb, SqlOrd ngramCount) =>
Maybe Gargantext.Core.Types.Offset
-> Maybe Gargantext.Core.Types.Limit
-> Maybe OrderBy
-> Select (Facet id (Field date) (Field title) (Field hyperdata) (Field category) (Field ngramCount) (Field score))
-> Select (Facet id (Field date) (Field title) (Field hyperdata) (Field category) (Field ngramCount) (Field score))
filterWith' o l order q = limit' l $ offset' o $ orderBy (orderWith' order) q
orderWith' :: (SqlOrd date, SqlOrd title, SqlOrd category, SqlOrd ngramCount, SqlOrd score)
=> Maybe OrderBy
-> Order (Facet id (Field date) (Field title) (Field SqlJsonb) (Field category) (Field ngramCount) (Field score))
orderWith' (Just DateAsc) = asc facetDoc_created
orderWith' (Just DateDesc) = desc facetDoc_created
orderWith' (Just TitleAsc) = asc facetDoc_title
orderWith' (Just TitleDesc) = desc facetDoc_title
orderWith' (Just NgramCountAsc) = asc facetDoc_ngramCount
orderWith' (Just NgramCountDesc) = desc facetDoc_ngramCount
orderWith' (Just ScoreAsc) = asc facetDoc_score
orderWith' (Just ScoreDesc) = desc facetDoc_score
orderWith' (Just SourceAsc) = ascNullsLast facetDoc_source
orderWith' (Just SourceDesc) = descNullsLast facetDoc_source
orderWith' (Just TagAsc) = asc facetDoc_category
orderWith' (Just TagDesc) = desc facetDoc_category
orderWith' _ = asc facetDoc_created
facetDoc_source :: SqlIsJson a
=> Facet id created title (Field a) favorite ngramCount score
-> FieldNullable SqlText
......
......@@ -31,6 +31,7 @@ type Title = Text
-- TODO remove Title
type FacetDoc = Facet NodeId UTCTime Title HyperdataDocument (Maybe Category) (Maybe Double) (Maybe Score)
type FacetDocAgg' = Facet NodeId UTCTime Title HyperdataDocument (Maybe Category) Int64 Int64
-- type FacetSources = FacetDoc
-- type FacetAuthors = FacetDoc
-- type FacetTerms = FacetDoc
......@@ -137,10 +138,27 @@ type FacetDocRead = Facet (Field SqlInt4 )
(FieldNullable SqlFloat8) -- Ngrams Count
(FieldNullable SqlFloat8) -- Score
type FacetDocAgg = Facet (Field SqlInt4 )
(Field SqlTimestamptz)
(Field SqlText )
(Field SqlJsonb )
(Field SqlInt4) -- Category
(Field SqlInt8) -- Ngrams Count
(Field SqlInt8) -- Score
type FacetDocAggPart = Facet (Field SqlInt4 )
(Field SqlTimestamptz)
(Field SqlText )
(Field SqlJsonb )
(Field SqlInt4) -- Category
(Field SqlInt4) -- Ngrams Count
(Field SqlInt4) -- Score
-----------------------------------------------------------------------
-----------------------------------------------------------------------
data OrderBy = DateAsc | DateDesc
| TitleAsc | TitleDesc
| NgramCountDesc | NgramCountAsc
| ScoreDesc | ScoreAsc
| SourceAsc | SourceDesc
| TagAsc | TagDesc
......@@ -148,17 +166,19 @@ data OrderBy = DateAsc | DateDesc
instance FromHttpApiData OrderBy
where
parseUrlPiece "DateAsc" = pure DateAsc
parseUrlPiece "DateDesc" = pure DateDesc
parseUrlPiece "TitleAsc" = pure TitleAsc
parseUrlPiece "TitleDesc" = pure TitleDesc
parseUrlPiece "ScoreAsc" = pure ScoreAsc
parseUrlPiece "ScoreDesc" = pure ScoreDesc
parseUrlPiece "SourceAsc" = pure SourceAsc
parseUrlPiece "SourceDesc" = pure SourceDesc
parseUrlPiece "TagAsc" = pure TagAsc
parseUrlPiece "TagDesc" = pure TagDesc
parseUrlPiece _ = Left "Unexpected value of OrderBy"
parseUrlPiece "DateAsc" = pure DateAsc
parseUrlPiece "DateDesc" = pure DateDesc
parseUrlPiece "TitleAsc" = pure TitleAsc
parseUrlPiece "TitleDesc" = pure TitleDesc
parseUrlPiece "NgramCountAsc" = pure NgramCountAsc
parseUrlPiece "NgramCountDesc" = pure NgramCountDesc
parseUrlPiece "ScoreAsc" = pure ScoreAsc
parseUrlPiece "ScoreDesc" = pure ScoreDesc
parseUrlPiece "SourceAsc" = pure SourceAsc
parseUrlPiece "SourceDesc" = pure SourceDesc
parseUrlPiece "TagAsc" = pure TagAsc
parseUrlPiece "TagDesc" = pure TagDesc
parseUrlPiece _ = Left "Unexpected value of OrderBy"
instance ToHttpApiData OrderBy where
toUrlPiece = T.pack . show
......
......@@ -27,5 +27,3 @@ limit' maybeLimit query = maybe query (\l -> limit l query) maybeLimit
offset' :: Maybe Offset -> Select a -> Select a
offset' maybeOffset query = maybe query (\o -> offset o query) maybeOffset
......@@ -168,7 +168,18 @@ getContextsForNgramsTerms cId ngramsTerms = do
query :: PGS.Query
query = [sql| SELECT t.id, t.hash_id, t.typename, t.user_id, t.parent_id, t.name, t.date, t.hyperdata, t.score, t.category
FROM (
SELECT DISTINCT ON (contexts.id) contexts.id AS id, hash_id, typename, user_id, parent_id, name, date, hyperdata, nodes_contexts.score AS score, nodes_contexts.category AS category,context_node_ngrams.doc_count AS doc_count
SELECT DISTINCT ON (contexts.id)
contexts.id AS id,
hash_id,
typename,
user_id,
parent_id,
name,
date,
hyperdata,
nodes_contexts.score AS score,
nodes_contexts.category AS category,
context_node_ngrams.doc_count AS doc_count
FROM contexts
JOIN context_node_ngrams ON contexts.id = context_node_ngrams.context_id
JOIN nodes_contexts ON contexts.id = nodes_contexts.context_id
......
......@@ -65,7 +65,10 @@ extra-deps:
# Databases libs
- git: https://github.com/garganscript/haskell-opaleye.git
commit: 18c4958e076f5f8f82a4e4a3fc9ec659d2bd8766
commit: a5693a2010e6d13f51cdc576fa1dc9985e79ee0e
#- ../haskell-opaleye
# - git: https://github.com/delanoe/hsparql.git
# commit: 2acbbc55ac9bbd4bf1a713c586b8b8e8b82892eb
- git: https://github.com/robstewart57/rdf4h.git
commit: 4fd2edf30c141600ffad6d730cc4c1c08a6dbce4
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment