Commit d4a2e775 authored by Alexandre Delanoë's avatar Alexandre Delanoë

[OPTIM] Ngrams Table scores

parent df2a6dfe
...@@ -16,21 +16,23 @@ Ngrams by node enable contextual metrics. ...@@ -16,21 +16,23 @@ Ngrams by node enable contextual metrics.
module Gargantext.Database.Action.Metrics.NgramsByContext module Gargantext.Database.Action.Metrics.NgramsByContext
where where
-- import Debug.Trace (trace)
--import Data.Map.Strict.Patch (PatchMap, Replace, diff) --import Data.Map.Strict.Patch (PatchMap, Replace, diff)
import Data.HashMap.Strict (HashMap) import Data.HashMap.Strict (HashMap)
import Data.Map (Map) import Data.Map (Map)
import Data.Maybe (catMaybes)
import Data.Set (Set) import Data.Set (Set)
import Data.Text (Text) import Data.Text (Text)
import Data.Tuple.Extra (first, second, swap) import Data.Tuple.Extra (first, second, swap)
import Database.PostgreSQL.Simple.SqlQQ (sql) import Database.PostgreSQL.Simple.SqlQQ (sql)
import Database.PostgreSQL.Simple.Types (Values(..), QualifiedIdentifier(..)) import Database.PostgreSQL.Simple.Types (Values(..), QualifiedIdentifier(..))
-- import Debug.Trace (trace)
import Gargantext.Core
import Gargantext.API.Ngrams.Types (NgramsTerm(..)) import Gargantext.API.Ngrams.Types (NgramsTerm(..))
import Gargantext.Core
import Gargantext.Data.HashMap.Strict.Utils as HM import Gargantext.Data.HashMap.Strict.Utils as HM
import Gargantext.Database.Admin.Types.Node (ListId, CorpusId, NodeId(..), ContextId, MasterCorpusId, NodeType(NodeDocument), UserCorpusId, DocId) import Gargantext.Database.Admin.Types.Node (ListId, CorpusId, NodeId(..), ContextId, MasterCorpusId, NodeType(NodeDocument), UserCorpusId, DocId)
import Gargantext.Database.Prelude (Cmd, runPGSQuery) import Gargantext.Database.Prelude (Cmd, runPGSQuery)
import Gargantext.Database.Schema.Ngrams (ngramsTypeId, NgramsType(..)) import Gargantext.Database.Query.Table.Ngrams (selectNgramsId)
import Gargantext.Database.Schema.Ngrams (ngramsTypeId, NgramsType(..), NgramsId)
import Gargantext.Prelude import Gargantext.Prelude
import qualified Data.HashMap.Strict as HM import qualified Data.HashMap.Strict as HM
import qualified Data.Map as Map import qualified Data.Map as Map
...@@ -111,37 +113,43 @@ getOccByNgramsOnlyFast' :: CorpusId ...@@ -111,37 +113,43 @@ getOccByNgramsOnlyFast' :: CorpusId
-> NgramsType -> NgramsType
-> [NgramsTerm] -> [NgramsTerm]
-> Cmd err (HashMap NgramsTerm Int) -> Cmd err (HashMap NgramsTerm Int)
getOccByNgramsOnlyFast' cId lId nt tms = -- trace (show (cId, lId)) $ getOccByNgramsOnlyFast' cId lId nt tms = do -- trace (show (cId, lId)) $
HM.fromListWith (+) <$> map (second round) <$> run cId lId nt tms mapNgramsIds <- selectNgramsId $ map unNgramsTerm tms
HM.fromListWith (+) <$> catMaybes
<$> map (\(nId, s) -> (,) <$> (NgramsTerm <$> (Map.lookup nId mapNgramsIds)) <*> (Just $ round s) )
<$> run cId lId nt (Map.keys mapNgramsIds)
where where
fields = [QualifiedIdentifier Nothing "text"]
run :: CorpusId run :: CorpusId
-> ListId -> ListId
-> NgramsType -> NgramsType
-> [NgramsTerm] -> [NgramsId]
-> Cmd err [(NgramsTerm, Double)] -> Cmd err [(NgramsId, Double)]
run cId' lId' nt' tms' = map (first NgramsTerm) <$> runPGSQuery query run cId' lId' nt' tms' = runPGSQuery query
( Values fields ((DPS.Only . unNgramsTerm) <$> tms') ( Values fields ((DPS.Only) <$> tms')
, cId' , cId'
, lId' , lId'
, ngramsTypeId nt' , ngramsTypeId nt'
) )
fields = [QualifiedIdentifier Nothing "int4"]
query :: DPS.Query query :: DPS.Query
query = [sql| query = [sql|
WITH input_rows(terms) AS (?) WITH input_ngrams(id) AS (?)
SELECT ng.terms, nng.weight FROM nodes_contexts nc
JOIN node_node_ngrams nng ON nng.node1_id = nc.node_id SELECT ngi.id, nng.weight FROM nodes_contexts nc
JOIN ngrams ng ON nng.ngrams_id = ng.id JOIN node_node_ngrams nng ON nng.node1_id = nc.node_id
JOIN input_rows ir ON ir.terms = ng.terms JOIN input_ngrams ngi ON nng.ngrams_id = ngi.id
WHERE nng.node1_id = ? -- CorpusId WHERE nng.node1_id = ?
AND nng.node2_id = ? -- ListId AND nng.node2_id = ?
AND nng.ngrams_type = ? -- NgramsTypeId AND nng.ngrams_type = ?
AND nc.category > 0 -- Not trash AND nc.category > 0
GROUP BY ng.terms, nng.weight GROUP BY ngi.id, nng.weight
|] |]
selectNgramsOccurrencesOnlyByContextUser_withSample :: HasDBid NodeType selectNgramsOccurrencesOnlyByContextUser_withSample :: HasDBid NodeType
......
...@@ -140,6 +140,22 @@ runPGSQuery q a = mkCmd $ \conn -> catch (PGS.query conn q a) (printError conn) ...@@ -140,6 +140,22 @@ runPGSQuery q a = mkCmd $ \conn -> catch (PGS.query conn q a) (printError conn)
hPutStrLn stderr q' hPutStrLn stderr q'
throw (SomeException e) throw (SomeException e)
{-
-- TODO
runPGSQueryFold :: ( CmdM env err m
, PGS.FromRow r
)
=> PGS.Query -> a -> (a -> r -> IO a) -> m a
runPGSQueryFold q initialState consume = mkCmd $ \conn -> catch (PGS.fold_ conn initialState consume) (printError conn)
where
printError c (SomeException e) = do
q' <- PGS.formatQuery c q
hPutStrLn stderr q'
throw (SomeException e)
-}
-- | TODO catch error -- | TODO catch error
runPGSQuery_ :: ( CmdM env err m runPGSQuery_ :: ( CmdM env err m
, PGS.FromRow r , PGS.FromRow r
......
...@@ -18,27 +18,29 @@ module Gargantext.Database.Query.Table.Ngrams ...@@ -18,27 +18,29 @@ module Gargantext.Database.Query.Table.Ngrams
, queryNgramsTable , queryNgramsTable
, selectNgramsByDoc , selectNgramsByDoc
, insertNgrams , insertNgrams
, selectNgramsId
) )
where where
import Control.Lens ((^.)) import Control.Lens ((^.))
import Data.ByteString.Internal (ByteString) import Data.ByteString.Internal (ByteString)
import Data.HashMap.Strict (HashMap) import Data.HashMap.Strict (HashMap)
import Data.Map (Map)
import Data.Text (Text) import Data.Text (Text)
import qualified Data.HashMap.Strict as HashMap
import qualified Data.List as List
import qualified Database.PostgreSQL.Simple as PGS
import Gargantext.Core.Types import Gargantext.Core.Types
import Gargantext.Database.Prelude (runOpaQuery, Cmd, formatPGSQuery, runPGSQuery) import Gargantext.Database.Prelude (runOpaQuery, Cmd, formatPGSQuery, runPGSQuery)
import Gargantext.Database.Query.Join (leftJoin3) import Gargantext.Database.Query.Join (leftJoin3)
import Gargantext.Database.Query.Table.ContextNodeNgrams2 import Gargantext.Database.Query.Table.ContextNodeNgrams2
import Gargantext.Database.Query.Table.NodeNgrams (queryNodeNgramsTable)
import Gargantext.Database.Schema.Ngrams import Gargantext.Database.Schema.Ngrams
import Gargantext.Database.Schema.NodeNgrams import Gargantext.Database.Schema.NodeNgrams
import Gargantext.Database.Query.Table.NodeNgrams (queryNodeNgramsTable)
import Gargantext.Database.Schema.Prelude import Gargantext.Database.Schema.Prelude
import Gargantext.Database.Types import Gargantext.Database.Types
import Gargantext.Prelude import Gargantext.Prelude
import qualified Data.HashMap.Strict as HashMap
import qualified Data.List as List
import qualified Data.Map as Map
import qualified Database.PostgreSQL.Simple as PGS
queryNgramsTable :: Select NgramsRead queryNgramsTable :: Select NgramsRead
queryNgramsTable = selectTable ngramsTable queryNgramsTable = selectTable ngramsTable
...@@ -106,3 +108,28 @@ queryInsertNgrams = [sql| ...@@ -106,3 +108,28 @@ queryInsertNgrams = [sql|
FROM input_rows FROM input_rows
JOIN ngrams c USING (terms); -- columns of unique index JOIN ngrams c USING (terms); -- columns of unique index
|] |]
--------------------------------------------------------------------------
selectNgramsId :: [Text] -> Cmd err (Map NgramsId Text)
selectNgramsId ns =
if List.null ns
then pure Map.empty
else Map.fromList <$> map (\(Indexed i t) -> (i, t)) <$> (selectNgramsId' ns)
selectNgramsId' :: [Text] -> Cmd err [Indexed Int Text]
selectNgramsId' ns = runPGSQuery querySelectNgramsId ( PGS.Only
$ Values fields ns
)
where
fields = map (\t -> QualifiedIdentifier Nothing t) ["text"]
querySelectNgramsId :: PGS.Query
querySelectNgramsId = [sql|
WITH input_rows(terms) AS (?)
SELECT n.id, n.terms
FROM ngrams n
JOIN input_rows ir ON ir.terms = n.terms
GROUP BY n.terms, n.id
|]
...@@ -11,8 +11,9 @@ Ngrams connection to the Database. ...@@ -11,8 +11,9 @@ Ngrams connection to the Database.
-} -}
{-# LANGUAGE Arrows #-} {-# OPTIONS_GHC -fno-warn-orphans #-}
{-# LANGUAGE FunctionalDependencies #-} {-# LANGUAGE Arrows #-}
{-# LANGUAGE FunctionalDependencies #-}
{-# LANGUAGE QuasiQuotes #-} {-# LANGUAGE QuasiQuotes #-}
{-# LANGUAGE TemplateHaskell #-} {-# LANGUAGE TemplateHaskell #-}
...@@ -173,6 +174,9 @@ instance FromField Ngrams where ...@@ -173,6 +174,9 @@ instance FromField Ngrams where
x <- fromField fld mdata x <- fromField fld mdata
pure $ text2ngrams x pure $ text2ngrams x
instance PGS.ToRow Text where
toRow t = [toField t]
text2ngrams :: Text -> Ngrams text2ngrams :: Text -> Ngrams
text2ngrams txt = UnsafeNgrams txt' $ length $ splitOn " " txt' text2ngrams txt = UnsafeNgrams txt' $ length $ splitOn " " txt'
where where
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment