[ngrams] some work towards getting the aggregate doc count

parent 021ff32e
Pipeline #3673 failed with stage
in 18 minutes and 20 seconds
......@@ -18,7 +18,8 @@ Portability : POSIX
module Gargantext.Database.Query.Facet
( runViewAuthorsDoc
, runViewDocuments
-- , viewDocuments'
, viewDocuments
, viewDocuments'
, runCountDocuments
, filterWith
......@@ -40,6 +41,7 @@ import Control.Arrow (returnA)
import Control.Lens ((^.))
import qualified Data.Text as T
import Opaleye
import qualified Opaleye.Aggregate as OAgg
import Protolude hiding (null, map, sum, not)
import qualified Opaleye.Internal.Unpackspec()
......@@ -82,7 +84,7 @@ viewAuthorsDoc cId _ nt = proc () -> do
--(doc,(_,(_,(_,contact')))) <- queryAuthorsDoc -< ()
(doc, _, _, _, contact') <- queryAuthorsDoc -< ()
restrict -< fromMaybeFields (sqlInt4 $ -1) (_node_id <$> contact') .=== pgNodeId cId
restrict -< fromMaybeFields (sqlInt4 $ -1) (_node_id <$> contact') .=== pgNodeId cId
restrict -< _node_typename doc .== sqlInt4 (toDBid nt)
returnA -< FacetDoc { facetDoc_id = _node_id doc
......@@ -125,15 +127,20 @@ runViewDocuments :: HasDBid NodeType
-> Cmd err [FacetDoc]
runViewDocuments cId t o l order query year = do
printDebug "[runViewDocuments] sqlQuery" $ showSql sqlQuery
runOpaQuery $ filterWith o l order sqlQuery
res <- runOpaQuery $ filterWith' o l order sqlQuery :: Cmd err [FacetDocAgg']
pure $ remapNgramsCount <$> res
where
sqlQuery = viewDocuments cId t (toDBid NodeDocument) query year
sqlQuery = viewDocuments' cId t (toDBid NodeDocument) query year
remapNgramsCount (FacetDoc { .. }) =
FacetDoc { facetDoc_ngramCount = Just $ fromIntegral facetDoc_ngramCount
, .. }
runCountDocuments :: HasDBid NodeType => CorpusId -> IsTrash -> Maybe Text -> Maybe Text -> Cmd err Int
runCountDocuments cId t mQuery mYear = do
runCountOpaQuery sqlQuery
where
sqlQuery = viewDocuments cId t (toDBid NodeDocument) mQuery mYear
sqlQuery = viewDocuments' cId t (toDBid NodeDocument) mQuery mYear
viewDocuments :: CorpusId
......@@ -144,16 +151,66 @@ viewDocuments :: CorpusId
-> Select FacetDocRead
viewDocuments cId t ntId mQuery mYear = proc () -> do
(c, nc) <- viewDocumentsQuery cId t ntId mQuery mYear -< ()
-- ngramCountAgg <- aggregate sumInt4 -< cnng
--ngramCountAgg <- laterally . aggregate sumInt4 -< cnng
-- TODO PGInt8 -> PGFloat conversion
-- https://github.com/tomjaguarpaw/haskell-opaleye/issues/401
-- unsafeCast "float8"
-- cnng <- optionalRestrict queryContextNodeNgramsTable -<
-- \cnng' -> (cnng' ^. cnng_node_id) .== pgNodeId cId .&&
-- (cnng' ^. cnng_context_id) .== (c ^. cs_id)
-- cnt <- aggregate sum -< maybeFields (sqlInt4 0) _cnng_doc_count cnng
-- (proc cnng' -> do
-- returnA -< maybeFields (sqlInt4 0) _cnng_doc_count cnng') -< cnng
returnA -< FacetDoc { facetDoc_id = _cs_id c
, facetDoc_created = _cs_date c
, facetDoc_title = _cs_name c
, facetDoc_hyperdata = _cs_hyperdata c
, facetDoc_category = toNullable $ nc^.nc_category
, facetDoc_ngramCount = toNullable $ nc^.nc_score
-- , facetDoc_ngramCount = toNullable $ toFields cnt
, facetDoc_score = toNullable $ nc^.nc_score
}
viewDocuments' :: CorpusId
-> IsTrash
-> NodeTypeId
-> Maybe Text
-> Maybe Text
-> Select FacetDocAgg
viewDocuments' cId t ntId mQuery mYear =
aggregate (pFacetDoc FacetDoc { facetDoc_id = OAgg.groupBy
, facetDoc_created = OAgg.groupBy
, facetDoc_title = OAgg.groupBy
, facetDoc_hyperdata = OAgg.groupBy
, facetDoc_category = OAgg.groupBy
, facetDoc_ngramCount = OAgg.sumInt4
, facetDoc_score = OAgg.sum })
(viewDocumentsAgg cId t ntId mQuery mYear)
viewDocumentsAgg :: CorpusId
-> IsTrash
-> NodeTypeId
-> Maybe Text
-> Maybe Text
-> Select FacetDocAggPart
viewDocumentsAgg cId t ntId mQuery mYear = proc () -> do
(c, nc) <- viewDocumentsQuery cId t ntId mQuery mYear -< ()
cnng <- optionalRestrict queryContextNodeNgramsTable -<
\cnng' -> (cnng' ^. cnng_node_id) .== (nc ^. nc_node_id) .&&
(cnng' ^. cnng_context_id) .== (c ^. cs_id)
returnA -< FacetDoc { facetDoc_id = _cs_id c
, facetDoc_created = _cs_date c
, facetDoc_title = _cs_name c
, facetDoc_hyperdata = _cs_hyperdata c
, facetDoc_category = nc ^. nc_category
, facetDoc_ngramCount = fromMaybeFields 0 $ _cnng_doc_count <$> cnng -- toNullable $ nc^.nc_score
-- , facetDoc_ngramCount = toNullable $ toFields cnt
, facetDoc_score = nc ^. nc_score
}
-- TODO Join with context_node_ngrams at context_id/node_id and sum by
-- doc_count.
viewDocumentsQuery :: CorpusId
......@@ -229,6 +286,37 @@ orderWith (Just TagDesc) = descNullsLast facetDoc_category
orderWith _ = asc facetDoc_created
filterWith' :: (SqlOrd date, SqlOrd title, SqlOrd category, SqlOrd score, hyperdata ~ SqlJsonb) =>
Maybe Gargantext.Core.Types.Offset
-> Maybe Gargantext.Core.Types.Limit
-> Maybe OrderBy
-> Select (Facet id (Field date) (Field title) (Field hyperdata) (Field category) ngramCount (Field score))
-> Select (Facet id (Field date) (Field title) (Field hyperdata) (Field category) ngramCount (Field score))
filterWith' o l order q = limit' l $ offset' o $ orderBy (orderWith' order) q
orderWith' :: (SqlOrd b1, SqlOrd b2, SqlOrd b3, SqlOrd b4)
=> Maybe OrderBy
-> Order (Facet id (Field b1) (Field b2) (Field SqlJsonb) (Field b3) ngramCount (Field b4))
orderWith' (Just DateAsc) = asc facetDoc_created
orderWith' (Just DateDesc) = desc facetDoc_created
orderWith' (Just TitleAsc) = asc facetDoc_title
orderWith' (Just TitleDesc) = desc facetDoc_title
orderWith' (Just ScoreAsc) = asc facetDoc_score
orderWith' (Just ScoreDesc) = desc facetDoc_score
orderWith' (Just SourceAsc) = ascNullsLast facetDoc_source
orderWith' (Just SourceDesc) = descNullsLast facetDoc_source
orderWith' (Just TagAsc) = asc facetDoc_category
orderWith' (Just TagDesc) = desc facetDoc_category
orderWith' _ = asc facetDoc_created
facetDoc_source :: SqlIsJson a
=> Facet id created title (Field a) favorite ngramCount score
-> FieldNullable SqlText
......
......@@ -31,6 +31,7 @@ type Title = Text
-- TODO remove Title
type FacetDoc = Facet NodeId UTCTime Title HyperdataDocument (Maybe Category) (Maybe Double) (Maybe Score)
type FacetDocAgg' = Facet NodeId UTCTime Title HyperdataDocument (Maybe Category) Int64 (Maybe Score)
-- type FacetSources = FacetDoc
-- type FacetAuthors = FacetDoc
-- type FacetTerms = FacetDoc
......@@ -137,6 +138,22 @@ type FacetDocRead = Facet (Field SqlInt4 )
(FieldNullable SqlFloat8) -- Ngrams Count
(FieldNullable SqlFloat8) -- Score
type FacetDocAgg = Facet (Field SqlInt4 )
(Field SqlTimestamptz)
(Field SqlText )
(Field SqlJsonb )
(Field SqlInt4) -- Category
(Field SqlInt8) -- Ngrams Count
(Field SqlFloat8) -- Score
type FacetDocAggPart = Facet (Field SqlInt4 )
(Field SqlTimestamptz)
(Field SqlText )
(Field SqlJsonb )
(Field SqlInt4) -- Category
(Field SqlInt4) -- Ngrams Count
(Field SqlFloat8) -- Score
-----------------------------------------------------------------------
-----------------------------------------------------------------------
data OrderBy = DateAsc | DateDesc
......
......@@ -64,8 +64,9 @@ extra-deps:
commit: fd7e5d7325939103cd87d0dc592faf644160341c
# Databases libs
- git: https://github.com/garganscript/haskell-opaleye.git
commit: 18c4958e076f5f8f82a4e4a3fc9ec659d2bd8766
#- git: https://github.com/garganscript/haskell-opaleye.git
# commit: 912641cddd98bbdd315f5614b4098dc2460064ee
- ../haskell-opaleye
- git: https://github.com/delanoe/hsparql.git
commit: 2acbbc55ac9bbd4bf1a713c586b8b8e8b82892eb
- git: https://github.com/robstewart57/rdf4h.git
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment