Commit e2897f33 authored by Przemyslaw Kaminski's avatar Przemyslaw Kaminski

[ngrams] implement ngrams term_id to further simplify the patches json

parent ce533895
Pipeline #3073 failed with stage
in 72 minutes and 54 seconds
......@@ -2,22 +2,24 @@ create table public.node_story_archive_history (
id SERIAL,
node_id INTEGER NOT NULL,
ngrams_type_id INTEGER NOT NULL,
ngrams_id INTEGER NOT NULL,
patch jsonb DEFAULT '{}'::jsonb NOT NULL,
PRIMARY KEY (id),
FOREIGN KEY (node_id) REFERENCES public.nodes(id) ON DELETE CASCADE
FOREIGN KEY (node_id) REFERENCES public.nodes(id) ON DELETE CASCADE,
FOREIGN KEY (ngrams_id) REFERENCES public.ngrams(id) ON DELETE CASCADE
);
ALTER TABLE public.node_story_archive_history OWNER TO gargantua;
INSERT INTO node_story_archive_history (node_id, ngrams_type_id, patch) SELECT t.node_id, t.ngrams_type_id, t.patch FROM
(
WITH q AS (SELECT node_id, history.*, row_number() over (ORDER BY node_id) AS sid
FROM node_stories,
jsonb_to_recordset(archive->'history') AS history("Authors" jsonb, "Institutes" jsonb, "NgramsTerms" jsonb, "Sources" jsonb))
-- INSERT INTO node_story_archive_history (node_id, ngrams_type_id, patch) SELECT t.node_id, t.ngrams_type_id, t.patch FROM
-- (
-- WITH q AS (SELECT node_id, history.*, row_number() over (ORDER BY node_id) AS sid
-- FROM node_stories,
-- jsonb_to_recordset(archive->'history') AS history("Authors" jsonb, "Institutes" jsonb, "NgramsTerms" jsonb, "Sources" jsonb))
(SELECT node_id, sid, 1 AS ngrams_type_id, "Authors" AS patch FROM q WHERE "Authors" IS NOT NULL)
UNION (SELECT node_id, sid, 2 AS ngrams_type_id, "Institutes" AS patch FROM q WHERE "Institutes" IS NOT NULL)
UNION (SELECT node_id, sid, 4 AS ngrams_type_id, "NgramsTerms" AS patch FROM q WHERE "NgramsTerms" IS NOT NULL)
UNION (SELECT node_id, sid, 3 AS ngrams_type_id, "Sources" AS patch FROM q WHERE "Sources" IS NOT NULL)
ORDER BY node_id, ngrams_type_id, sid
) AS t;
-- (SELECT node_id, sid, 1 AS ngrams_type_id, "Authors" AS patch FROM q WHERE "Authors" IS NOT NULL)
-- UNION (SELECT node_id, sid, 2 AS ngrams_type_id, "Institutes" AS patch FROM q WHERE "Institutes" IS NOT NULL)
-- UNION (SELECT node_id, sid, 4 AS ngrams_type_id, "NgramsTerms" AS patch FROM q WHERE "NgramsTerms" IS NOT NULL)
-- UNION (SELECT node_id, sid, 3 AS ngrams_type_id, "Sources" AS patch FROM q WHERE "Sources" IS NOT NULL)
-- ORDER BY node_id, ngrams_type_id, sid
-- ) AS t;
......@@ -125,19 +125,14 @@ instance (ToJSONKey a, ToSchema a) => ToSchema (MSet a) where
------------------------------------------------------------------------
newtype NgramsTerm = NgramsTerm { unNgramsTerm :: Text }
deriving (Ord, Eq, Show, Generic, ToJSONKey, ToJSON, FromJSON, Semigroup, Arbitrary, Serialise, ToSchema, Hashable, NFData)
instance IsHashable NgramsTerm where
hash (NgramsTerm t) = hash t
instance Monoid NgramsTerm where
mempty = NgramsTerm ""
instance FromJSONKey NgramsTerm where
fromJSONKey = FromJSONKeyTextParser $ \t -> pure $ NgramsTerm $ strip t
instance IsString NgramsTerm where
fromString s = NgramsTerm $ pack s
instance FromField NgramsTerm
where
fromField field mb = do
......@@ -148,6 +143,9 @@ instance FromField NgramsTerm
$ List.intercalate " " [ "cannot parse hyperdata for JSON: "
, show v
]
instance ToField NgramsTerm where
toField (NgramsTerm n) = toField n
data RootParent = RootParent
{ _rp_root :: NgramsTerm
......@@ -449,13 +447,16 @@ instance ToSchema NgramsPatch where
, ("old", nreSch)
, ("new", nreSch)
]
instance Arbitrary NgramsPatch where
arbitrary = frequency [ (9, NgramsPatch <$> arbitrary <*> (replace <$> arbitrary <*> arbitrary))
, (1, NgramsReplace <$> arbitrary <*> arbitrary)
]
instance Serialise NgramsPatch
instance FromField NgramsPatch where
fromField = fromJSONField
instance ToField NgramsPatch where
toField = toJSONField
instance Serialise (Replace ListType)
instance Serialise ListType
......@@ -513,7 +514,6 @@ instance Action (PairPatch (PatchMSet NgramsTerm) (Replace ListType)) NgramsRepo
instance Applicable NgramsPatch (Maybe NgramsRepoElement) where
applicable p = applicable (p ^. _NgramsPatch)
instance Action NgramsPatch (Maybe NgramsRepoElement) where
act p = act (p ^. _NgramsPatch)
......@@ -756,4 +756,3 @@ instance ToSchema UpdateTableNgramsCharts where
------------------------------------------------------------------------
type NgramsList = (Map TableNgrams.NgramsType (Versioned NgramsTableMap))
......@@ -89,7 +89,7 @@ import Control.Monad.Reader
import Data.Aeson hiding ((.=), decode)
import Data.ByteString.Char8 (hPutStrLn)
import Data.Map.Strict (Map)
import Data.Maybe (mapMaybe)
import Data.Maybe (catMaybes, mapMaybe)
import Data.Monoid
import Data.Pool (Pool, withResource)
import Data.Semigroup
......@@ -294,21 +294,34 @@ nodeStorySelect = selectTable nodeStoryTable
getNodeArchiveHistory :: Pool PGS.Connection -> NodeId -> IO [NgramsStatePatch']
getNodeArchiveHistory pool nodeId = do
as <- runPGSQuery pool query (nodeId, True)
let asTuples = mapMaybe (\(ngrams_type_id, patch) -> (\ntId -> (ntId, patch)) <$> (TableNgrams.fromNgramsTypeId ngrams_type_id)) as
pure $ (\(ntId, patch) -> fst $ PM.singleton ntId patch) <$> asTuples
let asTuples = mapMaybe (\(ngrams_type_id, ngrams, patch) -> (\ntId -> (ntId, ngrams, patch)) <$> (TableNgrams.fromNgramsTypeId ngrams_type_id)) as
pure $ (\(ntId, terms, patch) -> fst $ PM.singleton ntId (NgramsTablePatch $ fst $ PM.singleton terms patch)) <$> asTuples
where
query :: PGS.Query
query = [sql|SELECT ngrams_type_id, patch FROM node_story_archive_history WHERE node_id = ? AND ? |]
query = [sql|SELECT ngrams_type_id, terms, patch
FROM node_story_archive_history
JOIN ngrams ON ngrams.id = ngrams_id
WHERE node_id = ? AND ? |]
insertNodeArchiveHistory :: Pool PGS.Connection -> NodeId -> [NgramsStatePatch'] -> IO ()
insertNodeArchiveHistory _ _ [] = pure ()
insertNodeArchiveHistory pool nodeId (h:hs) = do
_ <- runPGSExecuteMany pool query $ (\(nType, patch) -> (nodeId, TableNgrams.ngramsTypeId nType, patch)) <$> (PM.toList h)
let tuples = mconcat $ (\(nType, (NgramsTablePatch patch)) ->
(\(term, p) ->
(nodeId, TableNgrams.ngramsTypeId nType, term, p)) <$> PM.toList patch) <$> PM.toList h :: [(NodeId, TableNgrams.NgramsTypeId, NgramsTerm, NgramsPatch)]
tuplesM <- mapM (\(nId, nTypeId, term, patch) -> do
ngrams <- runPGSQuery pool ngramsQuery (term, True)
pure $ (\(PGS.Only termId) -> (nId, nTypeId, termId, term, patch)) <$> (headMay ngrams)
) tuples :: IO [Maybe (NodeId, TableNgrams.NgramsTypeId, Int, NgramsTerm, NgramsPatch)]
_ <- runPGSExecuteMany pool query $ ((\(nId, nTypeId, termId, _term, patch) -> (nId, nTypeId, termId, patch)) <$> (catMaybes tuplesM))
_ <- insertNodeArchiveHistory pool nodeId hs
pure ()
where
ngramsQuery :: PGS.Query
ngramsQuery = [sql| SELECT id FROM ngrams WHERE terms = ? AND ? |]
query :: PGS.Query
query = [sql| INSERT INTO node_story_archive_history(node_id, ngrams_type_id, patch) VALUES (?, ?, ?) |]
query = [sql| INSERT INTO node_story_archive_history(node_id, ngrams_type_id, ngrams_id, patch) VALUES (?, ?, ?, ?) |]
getNodeStory :: Pool PGS.Connection -> NodeId -> IO NodeListStory
getNodeStory pool (NodeId nodeId) = do
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment