Commit e2897f33 authored by Przemyslaw Kaminski's avatar Przemyslaw Kaminski

[ngrams] implement ngrams term_id to further simplify the patches json

parent ce533895
...@@ -2,22 +2,24 @@ create table public.node_story_archive_history ( ...@@ -2,22 +2,24 @@ create table public.node_story_archive_history (
id SERIAL, id SERIAL,
node_id INTEGER NOT NULL, node_id INTEGER NOT NULL,
ngrams_type_id INTEGER NOT NULL, ngrams_type_id INTEGER NOT NULL,
ngrams_id INTEGER NOT NULL,
patch jsonb DEFAULT '{}'::jsonb NOT NULL, patch jsonb DEFAULT '{}'::jsonb NOT NULL,
PRIMARY KEY (id), PRIMARY KEY (id),
FOREIGN KEY (node_id) REFERENCES public.nodes(id) ON DELETE CASCADE FOREIGN KEY (node_id) REFERENCES public.nodes(id) ON DELETE CASCADE,
FOREIGN KEY (ngrams_id) REFERENCES public.ngrams(id) ON DELETE CASCADE
); );
ALTER TABLE public.node_story_archive_history OWNER TO gargantua; ALTER TABLE public.node_story_archive_history OWNER TO gargantua;
INSERT INTO node_story_archive_history (node_id, ngrams_type_id, patch) SELECT t.node_id, t.ngrams_type_id, t.patch FROM -- INSERT INTO node_story_archive_history (node_id, ngrams_type_id, patch) SELECT t.node_id, t.ngrams_type_id, t.patch FROM
( -- (
WITH q AS (SELECT node_id, history.*, row_number() over (ORDER BY node_id) AS sid -- WITH q AS (SELECT node_id, history.*, row_number() over (ORDER BY node_id) AS sid
FROM node_stories, -- FROM node_stories,
jsonb_to_recordset(archive->'history') AS history("Authors" jsonb, "Institutes" jsonb, "NgramsTerms" jsonb, "Sources" jsonb)) -- jsonb_to_recordset(archive->'history') AS history("Authors" jsonb, "Institutes" jsonb, "NgramsTerms" jsonb, "Sources" jsonb))
(SELECT node_id, sid, 1 AS ngrams_type_id, "Authors" AS patch FROM q WHERE "Authors" IS NOT NULL) -- (SELECT node_id, sid, 1 AS ngrams_type_id, "Authors" AS patch FROM q WHERE "Authors" IS NOT NULL)
UNION (SELECT node_id, sid, 2 AS ngrams_type_id, "Institutes" AS patch FROM q WHERE "Institutes" IS NOT NULL) -- UNION (SELECT node_id, sid, 2 AS ngrams_type_id, "Institutes" AS patch FROM q WHERE "Institutes" IS NOT NULL)
UNION (SELECT node_id, sid, 4 AS ngrams_type_id, "NgramsTerms" AS patch FROM q WHERE "NgramsTerms" IS NOT NULL) -- UNION (SELECT node_id, sid, 4 AS ngrams_type_id, "NgramsTerms" AS patch FROM q WHERE "NgramsTerms" IS NOT NULL)
UNION (SELECT node_id, sid, 3 AS ngrams_type_id, "Sources" AS patch FROM q WHERE "Sources" IS NOT NULL) -- UNION (SELECT node_id, sid, 3 AS ngrams_type_id, "Sources" AS patch FROM q WHERE "Sources" IS NOT NULL)
ORDER BY node_id, ngrams_type_id, sid -- ORDER BY node_id, ngrams_type_id, sid
) AS t; -- ) AS t;
...@@ -125,19 +125,14 @@ instance (ToJSONKey a, ToSchema a) => ToSchema (MSet a) where ...@@ -125,19 +125,14 @@ instance (ToJSONKey a, ToSchema a) => ToSchema (MSet a) where
------------------------------------------------------------------------ ------------------------------------------------------------------------
newtype NgramsTerm = NgramsTerm { unNgramsTerm :: Text } newtype NgramsTerm = NgramsTerm { unNgramsTerm :: Text }
deriving (Ord, Eq, Show, Generic, ToJSONKey, ToJSON, FromJSON, Semigroup, Arbitrary, Serialise, ToSchema, Hashable, NFData) deriving (Ord, Eq, Show, Generic, ToJSONKey, ToJSON, FromJSON, Semigroup, Arbitrary, Serialise, ToSchema, Hashable, NFData)
instance IsHashable NgramsTerm where instance IsHashable NgramsTerm where
hash (NgramsTerm t) = hash t hash (NgramsTerm t) = hash t
instance Monoid NgramsTerm where instance Monoid NgramsTerm where
mempty = NgramsTerm "" mempty = NgramsTerm ""
instance FromJSONKey NgramsTerm where instance FromJSONKey NgramsTerm where
fromJSONKey = FromJSONKeyTextParser $ \t -> pure $ NgramsTerm $ strip t fromJSONKey = FromJSONKeyTextParser $ \t -> pure $ NgramsTerm $ strip t
instance IsString NgramsTerm where instance IsString NgramsTerm where
fromString s = NgramsTerm $ pack s fromString s = NgramsTerm $ pack s
instance FromField NgramsTerm instance FromField NgramsTerm
where where
fromField field mb = do fromField field mb = do
...@@ -148,6 +143,9 @@ instance FromField NgramsTerm ...@@ -148,6 +143,9 @@ instance FromField NgramsTerm
$ List.intercalate " " [ "cannot parse hyperdata for JSON: " $ List.intercalate " " [ "cannot parse hyperdata for JSON: "
, show v , show v
] ]
instance ToField NgramsTerm where
toField (NgramsTerm n) = toField n
data RootParent = RootParent data RootParent = RootParent
{ _rp_root :: NgramsTerm { _rp_root :: NgramsTerm
...@@ -449,13 +447,16 @@ instance ToSchema NgramsPatch where ...@@ -449,13 +447,16 @@ instance ToSchema NgramsPatch where
, ("old", nreSch) , ("old", nreSch)
, ("new", nreSch) , ("new", nreSch)
] ]
instance Arbitrary NgramsPatch where instance Arbitrary NgramsPatch where
arbitrary = frequency [ (9, NgramsPatch <$> arbitrary <*> (replace <$> arbitrary <*> arbitrary)) arbitrary = frequency [ (9, NgramsPatch <$> arbitrary <*> (replace <$> arbitrary <*> arbitrary))
, (1, NgramsReplace <$> arbitrary <*> arbitrary) , (1, NgramsReplace <$> arbitrary <*> arbitrary)
] ]
instance Serialise NgramsPatch instance Serialise NgramsPatch
instance FromField NgramsPatch where
fromField = fromJSONField
instance ToField NgramsPatch where
toField = toJSONField
instance Serialise (Replace ListType) instance Serialise (Replace ListType)
instance Serialise ListType instance Serialise ListType
...@@ -513,7 +514,6 @@ instance Action (PairPatch (PatchMSet NgramsTerm) (Replace ListType)) NgramsRepo ...@@ -513,7 +514,6 @@ instance Action (PairPatch (PatchMSet NgramsTerm) (Replace ListType)) NgramsRepo
instance Applicable NgramsPatch (Maybe NgramsRepoElement) where instance Applicable NgramsPatch (Maybe NgramsRepoElement) where
applicable p = applicable (p ^. _NgramsPatch) applicable p = applicable (p ^. _NgramsPatch)
instance Action NgramsPatch (Maybe NgramsRepoElement) where instance Action NgramsPatch (Maybe NgramsRepoElement) where
act p = act (p ^. _NgramsPatch) act p = act (p ^. _NgramsPatch)
...@@ -756,4 +756,3 @@ instance ToSchema UpdateTableNgramsCharts where ...@@ -756,4 +756,3 @@ instance ToSchema UpdateTableNgramsCharts where
------------------------------------------------------------------------ ------------------------------------------------------------------------
type NgramsList = (Map TableNgrams.NgramsType (Versioned NgramsTableMap)) type NgramsList = (Map TableNgrams.NgramsType (Versioned NgramsTableMap))
...@@ -89,7 +89,7 @@ import Control.Monad.Reader ...@@ -89,7 +89,7 @@ import Control.Monad.Reader
import Data.Aeson hiding ((.=), decode) import Data.Aeson hiding ((.=), decode)
import Data.ByteString.Char8 (hPutStrLn) import Data.ByteString.Char8 (hPutStrLn)
import Data.Map.Strict (Map) import Data.Map.Strict (Map)
import Data.Maybe (mapMaybe) import Data.Maybe (catMaybes, mapMaybe)
import Data.Monoid import Data.Monoid
import Data.Pool (Pool, withResource) import Data.Pool (Pool, withResource)
import Data.Semigroup import Data.Semigroup
...@@ -294,21 +294,34 @@ nodeStorySelect = selectTable nodeStoryTable ...@@ -294,21 +294,34 @@ nodeStorySelect = selectTable nodeStoryTable
getNodeArchiveHistory :: Pool PGS.Connection -> NodeId -> IO [NgramsStatePatch'] getNodeArchiveHistory :: Pool PGS.Connection -> NodeId -> IO [NgramsStatePatch']
getNodeArchiveHistory pool nodeId = do getNodeArchiveHistory pool nodeId = do
as <- runPGSQuery pool query (nodeId, True) as <- runPGSQuery pool query (nodeId, True)
let asTuples = mapMaybe (\(ngrams_type_id, patch) -> (\ntId -> (ntId, patch)) <$> (TableNgrams.fromNgramsTypeId ngrams_type_id)) as let asTuples = mapMaybe (\(ngrams_type_id, ngrams, patch) -> (\ntId -> (ntId, ngrams, patch)) <$> (TableNgrams.fromNgramsTypeId ngrams_type_id)) as
pure $ (\(ntId, patch) -> fst $ PM.singleton ntId patch) <$> asTuples pure $ (\(ntId, terms, patch) -> fst $ PM.singleton ntId (NgramsTablePatch $ fst $ PM.singleton terms patch)) <$> asTuples
where where
query :: PGS.Query query :: PGS.Query
query = [sql|SELECT ngrams_type_id, patch FROM node_story_archive_history WHERE node_id = ? AND ? |] query = [sql|SELECT ngrams_type_id, terms, patch
FROM node_story_archive_history
JOIN ngrams ON ngrams.id = ngrams_id
WHERE node_id = ? AND ? |]
insertNodeArchiveHistory :: Pool PGS.Connection -> NodeId -> [NgramsStatePatch'] -> IO () insertNodeArchiveHistory :: Pool PGS.Connection -> NodeId -> [NgramsStatePatch'] -> IO ()
insertNodeArchiveHistory _ _ [] = pure () insertNodeArchiveHistory _ _ [] = pure ()
insertNodeArchiveHistory pool nodeId (h:hs) = do insertNodeArchiveHistory pool nodeId (h:hs) = do
_ <- runPGSExecuteMany pool query $ (\(nType, patch) -> (nodeId, TableNgrams.ngramsTypeId nType, patch)) <$> (PM.toList h) let tuples = mconcat $ (\(nType, (NgramsTablePatch patch)) ->
(\(term, p) ->
(nodeId, TableNgrams.ngramsTypeId nType, term, p)) <$> PM.toList patch) <$> PM.toList h :: [(NodeId, TableNgrams.NgramsTypeId, NgramsTerm, NgramsPatch)]
tuplesM <- mapM (\(nId, nTypeId, term, patch) -> do
ngrams <- runPGSQuery pool ngramsQuery (term, True)
pure $ (\(PGS.Only termId) -> (nId, nTypeId, termId, term, patch)) <$> (headMay ngrams)
) tuples :: IO [Maybe (NodeId, TableNgrams.NgramsTypeId, Int, NgramsTerm, NgramsPatch)]
_ <- runPGSExecuteMany pool query $ ((\(nId, nTypeId, termId, _term, patch) -> (nId, nTypeId, termId, patch)) <$> (catMaybes tuplesM))
_ <- insertNodeArchiveHistory pool nodeId hs _ <- insertNodeArchiveHistory pool nodeId hs
pure () pure ()
where where
ngramsQuery :: PGS.Query
ngramsQuery = [sql| SELECT id FROM ngrams WHERE terms = ? AND ? |]
query :: PGS.Query query :: PGS.Query
query = [sql| INSERT INTO node_story_archive_history(node_id, ngrams_type_id, patch) VALUES (?, ?, ?) |] query = [sql| INSERT INTO node_story_archive_history(node_id, ngrams_type_id, ngrams_id, patch) VALUES (?, ?, ?, ?) |]
getNodeStory :: Pool PGS.Connection -> NodeId -> IO NodeListStory getNodeStory :: Pool PGS.Connection -> NodeId -> IO NodeListStory
getNodeStory pool (NodeId nodeId) = do getNodeStory pool (NodeId nodeId) = do
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment