Commit d4a2e775 authored by Alexandre Delanoë's avatar Alexandre Delanoë

[OPTIM] Ngrams Table scores

parent df2a6dfe
......@@ -16,21 +16,23 @@ Ngrams by node enable contextual metrics.
module Gargantext.Database.Action.Metrics.NgramsByContext
-- import Debug.Trace (trace)
--import Data.Map.Strict.Patch (PatchMap, Replace, diff)
import Data.HashMap.Strict (HashMap)
import Data.Map (Map)
import Data.Maybe (catMaybes)
import Data.Set (Set)
import Data.Text (Text)
import Data.Tuple.Extra (first, second, swap)
import Database.PostgreSQL.Simple.SqlQQ (sql)
import Database.PostgreSQL.Simple.Types (Values(..), QualifiedIdentifier(..))
-- import Debug.Trace (trace)
import Gargantext.Core
import Gargantext.API.Ngrams.Types (NgramsTerm(..))
import Gargantext.Core
import Gargantext.Data.HashMap.Strict.Utils as HM
import Gargantext.Database.Admin.Types.Node (ListId, CorpusId, NodeId(..), ContextId, MasterCorpusId, NodeType(NodeDocument), UserCorpusId, DocId)
import Gargantext.Database.Prelude (Cmd, runPGSQuery)
import Gargantext.Database.Schema.Ngrams (ngramsTypeId, NgramsType(..))
import Gargantext.Database.Query.Table.Ngrams (selectNgramsId)
import Gargantext.Database.Schema.Ngrams (ngramsTypeId, NgramsType(..), NgramsId)
import Gargantext.Prelude
import qualified Data.HashMap.Strict as HM
import qualified Data.Map as Map
......@@ -111,37 +113,43 @@ getOccByNgramsOnlyFast' :: CorpusId
-> NgramsType
-> [NgramsTerm]
-> Cmd err (HashMap NgramsTerm Int)
getOccByNgramsOnlyFast' cId lId nt tms = -- trace (show (cId, lId)) $
HM.fromListWith (+) <$> map (second round) <$> run cId lId nt tms
getOccByNgramsOnlyFast' cId lId nt tms = do -- trace (show (cId, lId)) $
mapNgramsIds <- selectNgramsId $ map unNgramsTerm tms
HM.fromListWith (+) <$> catMaybes
<$> map (\(nId, s) -> (,) <$> (NgramsTerm <$> (Map.lookup nId mapNgramsIds)) <*> (Just $ round s) )
<$> run cId lId nt (Map.keys mapNgramsIds)
fields = [QualifiedIdentifier Nothing "text"]
run :: CorpusId
-> ListId
-> NgramsType
-> [NgramsTerm]
-> Cmd err [(NgramsTerm, Double)]
run cId' lId' nt' tms' = map (first NgramsTerm) <$> runPGSQuery query
( Values fields ((DPS.Only . unNgramsTerm) <$> tms')
-> [NgramsId]
-> Cmd err [(NgramsId, Double)]
run cId' lId' nt' tms' = runPGSQuery query
( Values fields ((DPS.Only) <$> tms')
, cId'
, lId'
, ngramsTypeId nt'
fields = [QualifiedIdentifier Nothing "int4"]
query :: DPS.Query
query = [sql|
WITH input_rows(terms) AS (?)
SELECT ng.terms, nng.weight FROM nodes_contexts nc
JOIN node_node_ngrams nng ON nng.node1_id = nc.node_id
JOIN ngrams ng ON nng.ngrams_id =
JOIN input_rows ir ON ir.terms = ng.terms
WHERE nng.node1_id = ? -- CorpusId
AND nng.node2_id = ? -- ListId
AND nng.ngrams_type = ? -- NgramsTypeId
AND nc.category > 0 -- Not trash
GROUP BY ng.terms, nng.weight
WITH input_ngrams(id) AS (?)
SELECT, nng.weight FROM nodes_contexts nc
JOIN node_node_ngrams nng ON nng.node1_id = nc.node_id
JOIN input_ngrams ngi ON nng.ngrams_id =
WHERE nng.node1_id = ?
AND nng.node2_id = ?
AND nng.ngrams_type = ?
AND nc.category > 0
GROUP BY, nng.weight
selectNgramsOccurrencesOnlyByContextUser_withSample :: HasDBid NodeType
......@@ -140,6 +140,22 @@ runPGSQuery q a = mkCmd $ \conn -> catch (PGS.query conn q a) (printError conn)
hPutStrLn stderr q'
throw (SomeException e)
runPGSQueryFold :: ( CmdM env err m
, PGS.FromRow r
=> PGS.Query -> a -> (a -> r -> IO a) -> m a
runPGSQueryFold q initialState consume = mkCmd $ \conn -> catch (PGS.fold_ conn initialState consume) (printError conn)
printError c (SomeException e) = do
q' <- PGS.formatQuery c q
hPutStrLn stderr q'
throw (SomeException e)
-- | TODO catch error
runPGSQuery_ :: ( CmdM env err m
, PGS.FromRow r
......@@ -18,27 +18,29 @@ module Gargantext.Database.Query.Table.Ngrams
, queryNgramsTable
, selectNgramsByDoc
, insertNgrams
, selectNgramsId
import Control.Lens ((^.))
import Data.ByteString.Internal (ByteString)
import Data.HashMap.Strict (HashMap)
import Data.Map (Map)
import Data.Text (Text)
import qualified Data.HashMap.Strict as HashMap
import qualified Data.List as List
import qualified Database.PostgreSQL.Simple as PGS
import Gargantext.Core.Types
import Gargantext.Database.Prelude (runOpaQuery, Cmd, formatPGSQuery, runPGSQuery)
import Gargantext.Database.Query.Join (leftJoin3)
import Gargantext.Database.Query.Table.ContextNodeNgrams2
import Gargantext.Database.Query.Table.NodeNgrams (queryNodeNgramsTable)
import Gargantext.Database.Schema.Ngrams
import Gargantext.Database.Schema.NodeNgrams
import Gargantext.Database.Query.Table.NodeNgrams (queryNodeNgramsTable)
import Gargantext.Database.Schema.Prelude
import Gargantext.Database.Types
import Gargantext.Prelude
import qualified Data.HashMap.Strict as HashMap
import qualified Data.List as List
import qualified Data.Map as Map
import qualified Database.PostgreSQL.Simple as PGS
queryNgramsTable :: Select NgramsRead
queryNgramsTable = selectTable ngramsTable
......@@ -106,3 +108,28 @@ queryInsertNgrams = [sql|
FROM input_rows
JOIN ngrams c USING (terms); -- columns of unique index
selectNgramsId :: [Text] -> Cmd err (Map NgramsId Text)
selectNgramsId ns =
if List.null ns
then pure Map.empty
else Map.fromList <$> map (\(Indexed i t) -> (i, t)) <$> (selectNgramsId' ns)
selectNgramsId' :: [Text] -> Cmd err [Indexed Int Text]
selectNgramsId' ns = runPGSQuery querySelectNgramsId ( PGS.Only
$ Values fields ns
fields = map (\t -> QualifiedIdentifier Nothing t) ["text"]
querySelectNgramsId :: PGS.Query
querySelectNgramsId = [sql|
WITH input_rows(terms) AS (?)
SELECT, n.terms
FROM ngrams n
JOIN input_rows ir ON ir.terms = n.terms
GROUP BY n.terms,
......@@ -11,8 +11,9 @@ Ngrams connection to the Database.
{-# LANGUAGE Arrows #-}
{-# LANGUAGE FunctionalDependencies #-}
{-# OPTIONS_GHC -fno-warn-orphans #-}
{-# LANGUAGE Arrows #-}
{-# LANGUAGE FunctionalDependencies #-}
{-# LANGUAGE QuasiQuotes #-}
{-# LANGUAGE TemplateHaskell #-}
......@@ -173,6 +174,9 @@ instance FromField Ngrams where
x <- fromField fld mdata
pure $ text2ngrams x
instance PGS.ToRow Text where
toRow t = [toField t]
text2ngrams :: Text -> Ngrams
text2ngrams txt = UnsafeNgrams txt' $ length $ splitOn " " txt'
