[ngrams] some work towards getting the aggregate doc count

parent 021ff32e
...@@ -18,7 +18,8 @@ Portability : POSIX ...@@ -18,7 +18,8 @@ Portability : POSIX
module Gargantext.Database.Query.Facet module Gargantext.Database.Query.Facet
( runViewAuthorsDoc ( runViewAuthorsDoc
, runViewDocuments , runViewDocuments
-- , viewDocuments' , viewDocuments
, viewDocuments'
, runCountDocuments , runCountDocuments
, filterWith , filterWith
...@@ -40,6 +41,7 @@ import Control.Arrow (returnA) ...@@ -40,6 +41,7 @@ import Control.Arrow (returnA)
import Control.Lens ((^.)) import Control.Lens ((^.))
import qualified Data.Text as T import qualified Data.Text as T
import Opaleye import Opaleye
import qualified Opaleye.Aggregate as OAgg
import Protolude hiding (null, map, sum, not) import Protolude hiding (null, map, sum, not)
import qualified Opaleye.Internal.Unpackspec() import qualified Opaleye.Internal.Unpackspec()
...@@ -82,7 +84,7 @@ viewAuthorsDoc cId _ nt = proc () -> do ...@@ -82,7 +84,7 @@ viewAuthorsDoc cId _ nt = proc () -> do
--(doc,(_,(_,(_,contact')))) <- queryAuthorsDoc -< () --(doc,(_,(_,(_,contact')))) <- queryAuthorsDoc -< ()
(doc, _, _, _, contact') <- queryAuthorsDoc -< () (doc, _, _, _, contact') <- queryAuthorsDoc -< ()
restrict -< fromMaybeFields (sqlInt4 $ -1) (_node_id <$> contact') .=== pgNodeId cId restrict -< fromMaybeFields (sqlInt4 $ -1) (_node_id <$> contact') .=== pgNodeId cId
restrict -< _node_typename doc .== sqlInt4 (toDBid nt) restrict -< _node_typename doc .== sqlInt4 (toDBid nt)
returnA -< FacetDoc { facetDoc_id = _node_id doc returnA -< FacetDoc { facetDoc_id = _node_id doc
...@@ -125,15 +127,20 @@ runViewDocuments :: HasDBid NodeType ...@@ -125,15 +127,20 @@ runViewDocuments :: HasDBid NodeType
-> Cmd err [FacetDoc] -> Cmd err [FacetDoc]
runViewDocuments cId t o l order query year = do runViewDocuments cId t o l order query year = do
printDebug "[runViewDocuments] sqlQuery" $ showSql sqlQuery printDebug "[runViewDocuments] sqlQuery" $ showSql sqlQuery
runOpaQuery $ filterWith o l order sqlQuery res <- runOpaQuery $ filterWith' o l order sqlQuery :: Cmd err [FacetDocAgg']
pure $ remapNgramsCount <$> res
where where
sqlQuery = viewDocuments cId t (toDBid NodeDocument) query year sqlQuery = viewDocuments' cId t (toDBid NodeDocument) query year
remapNgramsCount (FacetDoc { .. }) =
FacetDoc { facetDoc_ngramCount = Just $ fromIntegral facetDoc_ngramCount
, .. }
runCountDocuments :: HasDBid NodeType => CorpusId -> IsTrash -> Maybe Text -> Maybe Text -> Cmd err Int runCountDocuments :: HasDBid NodeType => CorpusId -> IsTrash -> Maybe Text -> Maybe Text -> Cmd err Int
runCountDocuments cId t mQuery mYear = do runCountDocuments cId t mQuery mYear = do
runCountOpaQuery sqlQuery runCountOpaQuery sqlQuery
where where
sqlQuery = viewDocuments cId t (toDBid NodeDocument) mQuery mYear sqlQuery = viewDocuments' cId t (toDBid NodeDocument) mQuery mYear
viewDocuments :: CorpusId viewDocuments :: CorpusId
...@@ -144,16 +151,66 @@ viewDocuments :: CorpusId ...@@ -144,16 +151,66 @@ viewDocuments :: CorpusId
-> Select FacetDocRead -> Select FacetDocRead
viewDocuments cId t ntId mQuery mYear = proc () -> do viewDocuments cId t ntId mQuery mYear = proc () -> do
(c, nc) <- viewDocumentsQuery cId t ntId mQuery mYear -< () (c, nc) <- viewDocumentsQuery cId t ntId mQuery mYear -< ()
-- ngramCountAgg <- aggregate sumInt4 -< cnng --ngramCountAgg <- laterally . aggregate sumInt4 -< cnng
-- TODO PGInt8 -> PGFloat conversion
-- https://github.com/tomjaguarpaw/haskell-opaleye/issues/401
-- unsafeCast "float8"
-- cnng <- optionalRestrict queryContextNodeNgramsTable -<
-- \cnng' -> (cnng' ^. cnng_node_id) .== pgNodeId cId .&&
-- (cnng' ^. cnng_context_id) .== (c ^. cs_id)
-- cnt <- aggregate sum -< maybeFields (sqlInt4 0) _cnng_doc_count cnng
-- (proc cnng' -> do
-- returnA -< maybeFields (sqlInt4 0) _cnng_doc_count cnng') -< cnng
returnA -< FacetDoc { facetDoc_id = _cs_id c returnA -< FacetDoc { facetDoc_id = _cs_id c
, facetDoc_created = _cs_date c , facetDoc_created = _cs_date c
, facetDoc_title = _cs_name c , facetDoc_title = _cs_name c
, facetDoc_hyperdata = _cs_hyperdata c , facetDoc_hyperdata = _cs_hyperdata c
, facetDoc_category = toNullable $ nc^.nc_category , facetDoc_category = toNullable $ nc^.nc_category
, facetDoc_ngramCount = toNullable $ nc^.nc_score , facetDoc_ngramCount = toNullable $ nc^.nc_score
-- , facetDoc_ngramCount = toNullable $ toFields cnt
, facetDoc_score = toNullable $ nc^.nc_score , facetDoc_score = toNullable $ nc^.nc_score
} }
viewDocuments' :: CorpusId
-> IsTrash
-> NodeTypeId
-> Maybe Text
-> Maybe Text
-> Select FacetDocAgg
viewDocuments' cId t ntId mQuery mYear =
aggregate (pFacetDoc FacetDoc { facetDoc_id = OAgg.groupBy
, facetDoc_created = OAgg.groupBy
, facetDoc_title = OAgg.groupBy
, facetDoc_hyperdata = OAgg.groupBy
, facetDoc_category = OAgg.groupBy
, facetDoc_ngramCount = OAgg.sumInt4
, facetDoc_score = OAgg.sum })
(viewDocumentsAgg cId t ntId mQuery mYear)
viewDocumentsAgg :: CorpusId
-> IsTrash
-> NodeTypeId
-> Maybe Text
-> Maybe Text
-> Select FacetDocAggPart
viewDocumentsAgg cId t ntId mQuery mYear = proc () -> do
(c, nc) <- viewDocumentsQuery cId t ntId mQuery mYear -< ()
cnng <- optionalRestrict queryContextNodeNgramsTable -<
\cnng' -> (cnng' ^. cnng_node_id) .== (nc ^. nc_node_id) .&&
(cnng' ^. cnng_context_id) .== (c ^. cs_id)
returnA -< FacetDoc { facetDoc_id = _cs_id c
, facetDoc_created = _cs_date c
, facetDoc_title = _cs_name c
, facetDoc_hyperdata = _cs_hyperdata c
, facetDoc_category = nc ^. nc_category
, facetDoc_ngramCount = fromMaybeFields 0 $ _cnng_doc_count <$> cnng -- toNullable $ nc^.nc_score
-- , facetDoc_ngramCount = toNullable $ toFields cnt
, facetDoc_score = nc ^. nc_score
}
-- TODO Join with context_node_ngrams at context_id/node_id and sum by -- TODO Join with context_node_ngrams at context_id/node_id and sum by
-- doc_count. -- doc_count.
viewDocumentsQuery :: CorpusId viewDocumentsQuery :: CorpusId
...@@ -229,6 +286,37 @@ orderWith (Just TagDesc) = descNullsLast facetDoc_category ...@@ -229,6 +286,37 @@ orderWith (Just TagDesc) = descNullsLast facetDoc_category
orderWith _ = asc facetDoc_created orderWith _ = asc facetDoc_created
filterWith' :: (SqlOrd date, SqlOrd title, SqlOrd category, SqlOrd score, hyperdata ~ SqlJsonb) =>
Maybe Gargantext.Core.Types.Offset
-> Maybe Gargantext.Core.Types.Limit
-> Maybe OrderBy
-> Select (Facet id (Field date) (Field title) (Field hyperdata) (Field category) ngramCount (Field score))
-> Select (Facet id (Field date) (Field title) (Field hyperdata) (Field category) ngramCount (Field score))
filterWith' o l order q = limit' l $ offset' o $ orderBy (orderWith' order) q
orderWith' :: (SqlOrd b1, SqlOrd b2, SqlOrd b3, SqlOrd b4)
=> Maybe OrderBy
-> Order (Facet id (Field b1) (Field b2) (Field SqlJsonb) (Field b3) ngramCount (Field b4))
orderWith' (Just DateAsc) = asc facetDoc_created
orderWith' (Just DateDesc) = desc facetDoc_created
orderWith' (Just TitleAsc) = asc facetDoc_title
orderWith' (Just TitleDesc) = desc facetDoc_title
orderWith' (Just ScoreAsc) = asc facetDoc_score
orderWith' (Just ScoreDesc) = desc facetDoc_score
orderWith' (Just SourceAsc) = ascNullsLast facetDoc_source
orderWith' (Just SourceDesc) = descNullsLast facetDoc_source
orderWith' (Just TagAsc) = asc facetDoc_category
orderWith' (Just TagDesc) = desc facetDoc_category
orderWith' _ = asc facetDoc_created
facetDoc_source :: SqlIsJson a facetDoc_source :: SqlIsJson a
=> Facet id created title (Field a) favorite ngramCount score => Facet id created title (Field a) favorite ngramCount score
-> FieldNullable SqlText -> FieldNullable SqlText
......
...@@ -31,6 +31,7 @@ type Title = Text ...@@ -31,6 +31,7 @@ type Title = Text
-- TODO remove Title -- TODO remove Title
type FacetDoc = Facet NodeId UTCTime Title HyperdataDocument (Maybe Category) (Maybe Double) (Maybe Score) type FacetDoc = Facet NodeId UTCTime Title HyperdataDocument (Maybe Category) (Maybe Double) (Maybe Score)
type FacetDocAgg' = Facet NodeId UTCTime Title HyperdataDocument (Maybe Category) Int64 (Maybe Score)
-- type FacetSources = FacetDoc -- type FacetSources = FacetDoc
-- type FacetAuthors = FacetDoc -- type FacetAuthors = FacetDoc
-- type FacetTerms = FacetDoc -- type FacetTerms = FacetDoc
...@@ -137,6 +138,22 @@ type FacetDocRead = Facet (Field SqlInt4 ) ...@@ -137,6 +138,22 @@ type FacetDocRead = Facet (Field SqlInt4 )
(FieldNullable SqlFloat8) -- Ngrams Count (FieldNullable SqlFloat8) -- Ngrams Count
(FieldNullable SqlFloat8) -- Score (FieldNullable SqlFloat8) -- Score
type FacetDocAgg = Facet (Field SqlInt4 )
(Field SqlTimestamptz)
(Field SqlText )
(Field SqlJsonb )
(Field SqlInt4) -- Category
(Field SqlInt8) -- Ngrams Count
(Field SqlFloat8) -- Score
type FacetDocAggPart = Facet (Field SqlInt4 )
(Field SqlTimestamptz)
(Field SqlText )
(Field SqlJsonb )
(Field SqlInt4) -- Category
(Field SqlInt4) -- Ngrams Count
(Field SqlFloat8) -- Score
----------------------------------------------------------------------- -----------------------------------------------------------------------
----------------------------------------------------------------------- -----------------------------------------------------------------------
data OrderBy = DateAsc | DateDesc data OrderBy = DateAsc | DateDesc
......
...@@ -64,8 +64,9 @@ extra-deps: ...@@ -64,8 +64,9 @@ extra-deps:
commit: fd7e5d7325939103cd87d0dc592faf644160341c commit: fd7e5d7325939103cd87d0dc592faf644160341c
# Databases libs # Databases libs
- git: https://github.com/garganscript/haskell-opaleye.git #- git: https://github.com/garganscript/haskell-opaleye.git
commit: 18c4958e076f5f8f82a4e4a3fc9ec659d2bd8766 # commit: 912641cddd98bbdd315f5614b4098dc2460064ee
- ../haskell-opaleye
- git: https://github.com/delanoe/hsparql.git - git: https://github.com/delanoe/hsparql.git
commit: 2acbbc55ac9bbd4bf1a713c586b8b8e8b82892eb commit: 2acbbc55ac9bbd4bf1a713c586b8b8e8b82892eb
- git: https://github.com/robstewart57/rdf4h.git - git: https://github.com/robstewart57/rdf4h.git
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment