Commit a2788ade authored by Przemyslaw Kaminski's avatar Przemyslaw Kaminski

[nodeStory] optimize node_stories table

Read in backend works now, insert and upsert not yet.
parent f43b4bb1
Pipeline #3118 failed with stage
in 0 seconds
-- we will migrate data here
-- rename old table and create a new one
ALTER TABLE public.node_stories RENAME TO node_stories_old;
CREATE TABLE public.node_stories (
id SERIAL,
node_id INTEGER NOT NULL,
version INTEGER NOT NULL,
ngrams_type_id INTEGER NOT NULL,
ngrams_id INTEGER NOT NULL,
--children TEXT[],
ngrams_repo_element jsonb DEFAULT '{}'::jsonb NOT NULL,
PRIMARY KEY (id),
FOREIGN KEY (node_id) REFERENCES public.nodes(id) ON DELETE CASCADE,
FOREIGN KEY (ngrams_id) REFERENCES public.ngrams(id) ON DELETE CASCADE
);
ALTER TABLE public.node_stories OWNER TO gargantua;
CREATE UNIQUE INDEX ON public.node_stories USING btree (node_id, ngrams_type_id, ngrams_id);
-- Authors (ngrams_type_id = 1), see G.D.S.Ngrams.hs -> ngramsTypeId
INSERT INTO public.node_stories
(node_id, version, ngrams_type_id, ngrams_id, ngrams_repo_element)
SELECT node_id, (archive->'version')::int, 1, ngrams.id, j.value
FROM node_stories_old
CROSS JOIN jsonb_each(archive->'state'->'Authors') AS j
JOIN ngrams ON terms = j.key;
-- we will leave children for later, small steps
-- INSERT INTO public.node_stories
-- (node_id, version, ngrams_type_id, ngrams_id, children, ngrams_repo_element)
-- SELECT node_id, (archive->'version')::int, 1, ngrams.id, c.children, (j.value - 'children')
-- FROM node_stories_old
-- CROSS JOIN jsonb_each(archive->'state'->'Authors') AS j
-- CROSS JOIN LATERAL (SELECT array_agg(d.elem) AS children FROM jsonb_array_elements_text(j.value->'children') AS d(elem)) AS c
-- JOIN ngrams ON terms = j.key;
-- Institutes (ngrams_type_id = 2)
INSERT INTO public.node_stories
(node_id, version, ngrams_type_id, ngrams_id, ngrams_repo_element)
SELECT node_id, (archive->'version')::int, 2, ngrams.id, j.value
FROM node_stories_old
CROSS JOIN jsonb_each(archive->'state'->'Institutes') AS j
JOIN ngrams ON terms = j.key;
-- Sources (ngrams_type_id = 3)
INSERT INTO public.node_stories
(node_id, version, ngrams_type_id, ngrams_id, ngrams_repo_element)
SELECT node_id, (archive->'version')::int, 3, ngrams.id, j.value
FROM node_stories_old
CROSS JOIN jsonb_each(archive->'state'->'Sources') AS j
JOIN ngrams ON terms = j.key;
-- NgramsTerms (ngrams_type_id = 4)
INSERT INTO public.node_stories
(node_id, version, ngrams_type_id, ngrams_id, ngrams_repo_element)
SELECT node_id, (archive->'version')::int, 4, ngrams.id, j.value
FROM node_stories_old
CROSS JOIN jsonb_each(archive->'state'->'NgramsTerms') AS j
JOIN ngrams ON terms = j.key;
......@@ -36,4 +36,3 @@ ALTER TABLE public.node_story_archive_history OWNER TO gargantua;
-- UNION (SELECT node_id, sid, 3 AS ngrams_type_id, "Sources" AS patch FROM q WHERE "Sources" IS NOT NULL)
-- ORDER BY node_id, ngrams_type_id, sid
-- ) AS t;
......@@ -96,7 +96,6 @@ library
Gargantext.Core.Viz.Phylo.SynchronicClustering
Gargantext.Core.Viz.Types
other-modules:
ConcurrentTest
Gargantext.API.Admin.Auth
Gargantext.API.Admin.FrontEnd
Gargantext.API.Admin.Orchestrator
......
......@@ -29,6 +29,7 @@ module Gargantext.API.Ngrams
, TableNgramsApiPut
, getTableNgrams
, getTableNgramsCorpus
, setListNgrams
--, rmListNgrams TODO fix before exporting
, apiNgramsTableCorpus
......
......@@ -22,6 +22,7 @@ import Data.Hashable (Hashable)
import Data.Set (Set)
import Data.Validity
import Gargantext.API.Ngrams.Types
import Gargantext.Core.NodeStory
import Gargantext.Core.Types (ListType(..), NodeId, NodeType(..), ListId)
import Gargantext.Database.Prelude (CmdM, HasConnectionPool(..))
import Gargantext.Database.Schema.Ngrams (NgramsType)
......@@ -29,7 +30,6 @@ import Gargantext.Prelude
import qualified Data.HashMap.Strict as HM
import qualified Data.Map.Strict as Map
import qualified Data.Set as Set
import Gargantext.Core.NodeStory
import qualified Gargantext.Core.NodeStoryFile as NSF
......@@ -209,7 +209,7 @@ migrateFromDirToDb = do
_ <- mapM (\(nId, a) -> do
n <- liftBase $ nodeExists pool nId
case n of
False -> pure 0
False -> pure ()
True -> liftBase $ upsertNodeArchive pool nId a
) $ Map.toList nls
--_ <- nodeStoryIncs (Just $ NodeStory nls) listIds
......
......@@ -28,7 +28,7 @@ import Data.String (IsString, fromString)
import Data.Swagger hiding (version, patch)
import Data.Text (Text, pack, strip)
import Data.Validity
import Database.PostgreSQL.Simple.FromField (FromField, fromField, fromJSONField, ResultError(ConversionFailed), returnError)
import Database.PostgreSQL.Simple.FromField (FromField, fromField, fromJSONField)
import Database.PostgreSQL.Simple.ToField (ToField, toJSONField, toField)
import GHC.Generics (Generic)
import Gargantext.Core.Text (size)
......@@ -44,7 +44,6 @@ import Servant.Job.Utils (jsonOptions)
import Test.QuickCheck (elements, frequency)
import Test.QuickCheck.Arbitrary (Arbitrary, arbitrary)
import qualified Data.HashMap.Strict.InsOrd as InsOrdHashMap
import qualified Data.List as List
import qualified Data.Map.Strict as Map
import qualified Data.Map.Strict.Patch as PM
import qualified Data.Set as Set
......@@ -124,7 +123,7 @@ instance (ToJSONKey a, ToSchema a) => ToSchema (MSet a) where
------------------------------------------------------------------------
newtype NgramsTerm = NgramsTerm { unNgramsTerm :: Text }
deriving (Ord, Eq, Show, Generic, ToJSONKey, ToJSON, FromJSON, Semigroup, Arbitrary, Serialise, ToSchema, Hashable, NFData)
deriving (Ord, Eq, Show, Generic, ToJSONKey, ToJSON, FromJSON, Semigroup, Arbitrary, Serialise, ToSchema, Hashable, NFData, FromField, ToField)
instance IsHashable NgramsTerm where
hash (NgramsTerm t) = hash t
instance Monoid NgramsTerm where
......@@ -133,18 +132,6 @@ instance FromJSONKey NgramsTerm where
fromJSONKey = FromJSONKeyTextParser $ \t -> pure $ NgramsTerm $ strip t
instance IsString NgramsTerm where
fromString s = NgramsTerm $ pack s
instance FromField NgramsTerm
where
fromField field mb = do
v <- fromField field mb
case fromJSON v of
Success a -> pure $ NgramsTerm $ strip a
Error _err -> returnError ConversionFailed field
$ List.intercalate " " [ "cannot parse hyperdata for JSON: "
, show v
]
instance ToField NgramsTerm where
toField (NgramsTerm n) = toField n
data RootParent = RootParent
......@@ -164,19 +151,20 @@ data NgramsRepoElement = NgramsRepoElement
, _nre_children :: !(MSet NgramsTerm)
}
deriving (Ord, Eq, Show, Generic)
deriveJSON (unPrefix "_nre_") ''NgramsRepoElement
-- TODO
-- if ngrams & not size => size
-- drop occurrences
makeLenses ''NgramsRepoElement
instance ToSchema NgramsRepoElement where
declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_nre_")
instance Serialise NgramsRepoElement
instance FromField NgramsRepoElement where
fromField = fromJSONField
instance ToField NgramsRepoElement where
toField = toJSONField
instance Serialise (MSet NgramsTerm)
instance Serialise NgramsRepoElement
data NgramsElement =
NgramsElement { _ne_ngrams :: NgramsTerm
......
This diff is collapsed.
......@@ -27,7 +27,7 @@ import Gargantext.API.Ngrams.Tools (filterListWithRoot, groupNodesByNgrams, Diag
import Gargantext.Database.Prelude (runPGSQuery{-, formatPGSQuery-})
import Gargantext.API.Ngrams.Types (TabType(..), ngramsTypeFromTabType, NgramsTerm(..))
import Gargantext.Core.Mail.Types (HasMail)
import Gargantext.Core.NodeStory
import Gargantext.Core.NodeStory hiding (runPGSQuery)
import Gargantext.Core.Text.Metrics (scored, Scored(..), {-localMetrics, toScored-})
import Database.PostgreSQL.Simple.ToField (toField, Action{-, ToField-})
import Gargantext.Core.Types (ListType(..), Limit, NodeType(..), ContextId)
......@@ -88,7 +88,7 @@ updateNgramsOccurrences cId mlId = do
updateNgramsOccurrences' :: (FlowCmdM env err m)
=> CorpusId -> Maybe ListId -> Maybe Limit -> TabType
=> CorpusId -> Maybe ListId -> Maybe Limit -> TabType
-> m [Int]
updateNgramsOccurrences' cId maybeListId maybeLimit tabType = do
......@@ -97,7 +97,7 @@ updateNgramsOccurrences' cId maybeListId maybeLimit tabType = do
Just lId' -> pure lId'
result <- getNgramsOccurrences cId lId tabType maybeLimit
let
toInsert :: [[Action]]
toInsert = map (\(ngramsTerm, score)
......@@ -121,7 +121,7 @@ updateNgramsOccurrences' cId maybeListId maybeLimit tabType = do
RETURNING 1
|]
let fields = map (\t-> QualifiedIdentifier Nothing t)
let fields = map (\t-> QualifiedIdentifier Nothing t)
$ map Text.pack ["int4", "int4","text","int4","int4"]
map (\(Only a) -> a) <$> runPGSQuery queryInsert (Only $ Values fields toInsert)
......@@ -163,7 +163,7 @@ updateContextScore cId maybeListId = do
Just lId' -> pure lId'
result <- getContextsNgramsScore cId lId Terms MapTerm Nothing
let
toInsert :: [[Action]]
toInsert = map (\(contextId, score)
......@@ -185,7 +185,7 @@ updateContextScore cId maybeListId = do
RETURNING 1
|]
let fields = map (\t-> QualifiedIdentifier Nothing t)
let fields = map (\t-> QualifiedIdentifier Nothing t)
$ map Text.pack ["int4", "int4","int4"]
map (\(Only a) -> a) <$> runPGSQuery queryInsert (Only $ Values fields toInsert)
......@@ -243,6 +243,3 @@ getNgrams lId tabType = do
take' :: Maybe Int -> [a] -> [a]
take' Nothing xs = xs
take' (Just n) xs = take n xs
......@@ -113,4 +113,3 @@ nodeTypes = [ (n, toDBid n) | n <- allNodeTypes ]
fromNodeTypeId :: NodeTypeId -> NodeType
fromNodeTypeId tId = fromMaybe (panic $ pack $ "Type Id " <> show tId <> " does not exist")
(lookup tId nodeTypeInv)
......@@ -30,8 +30,8 @@ import Data.Morpheus.Types (GQLType)
import Data.Swagger
import Data.Text (Text, unpack, pack)
import Data.Time (UTCTime)
import Database.PostgreSQL.Simple.FromField (FromField, fromField)
import Database.PostgreSQL.Simple.ToField (ToField, toField)
import Database.PostgreSQL.Simple.FromField (FromField, fromField, fromJSONField)
import Database.PostgreSQL.Simple.ToField (ToField, toField, toJSONField)
import GHC.Generics (Generic)
import Gargantext.Core.Utils.Prefix (unPrefix, unPrefixSwagger, wellNamedSchema)
import Gargantext.Database.Schema.Context
......@@ -357,6 +357,20 @@ data NodeType = NodeUser
deriving (Show, Read, Eq, Generic, Bounded, Enum)
instance GQLType NodeType
instance FromJSON NodeType
instance ToJSON NodeType
instance FromHttpApiData NodeType where
parseUrlPiece = Right . read . unpack
instance ToHttpApiData NodeType where
toUrlPiece = pack . show
instance ToParamSchema NodeType
instance ToSchema NodeType
instance Arbitrary NodeType where
arbitrary = elements allNodeTypes
instance FromField NodeType where
fromField = fromJSONField
instance ToField NodeType where
toField = toJSONField
allNodeTypes :: [NodeType]
......@@ -394,21 +408,6 @@ defaultName NodeFrameNotebook = "Code"
defaultName NodeFile = "File"
instance FromJSON NodeType
instance ToJSON NodeType
instance FromHttpApiData NodeType where
parseUrlPiece = Right . read . unpack
instance ToHttpApiData NodeType where
toUrlPiece = pack . show
instance ToParamSchema NodeType
instance ToSchema NodeType
instance Arbitrary NodeType where
arbitrary = elements allNodeTypes
------------------------------------------------------------------------
-- Instances
......@@ -451,4 +450,3 @@ instance DefaultFromField SqlText (Maybe Hash)
context2node :: Context a -> Node a
context2node (Context ci ch ct cu cp cn cd chy) = Node ci ch ct cu cp cn cd chy
......@@ -132,4 +132,3 @@ selectNgramsId' ns = runPGSQuery querySelectNgramsId ( PGS.Only
JOIN input_rows ir ON ir.terms = n.terms
GROUP BY n.terms, n.id
|]
......@@ -26,7 +26,6 @@ import Control.Lens (set, view)
import Data.Aeson
import Data.Maybe (fromMaybe)
import Data.Text (Text)
import qualified Database.PostgreSQL.Simple as DPS
import Database.PostgreSQL.Simple.SqlQQ (sql)
import Opaleye hiding (FromField)
import Prelude hiding (null, id, map, sum)
......@@ -41,6 +40,9 @@ import Gargantext.Database.Query.Table.Node.Error
import Gargantext.Database.Schema.Node
import Gargantext.Prelude hiding (sum, head)
import qualified Database.PostgreSQL.Simple as PGS
queryNodeSearchTable :: Select NodeSearchRead
queryNodeSearchTable = selectTable nodeTableSearch
......@@ -123,7 +125,7 @@ getClosestParentIdByType :: HasDBid NodeType
-> NodeType
-> Cmd err (Maybe NodeId)
getClosestParentIdByType nId nType = do
result <- runPGSQuery query (nId, 0 :: Int)
result <- runPGSQuery query (PGS.Only nId)
case result of
[(NodeId parentId, pTypename)] -> do
if toDBid nType == pTypename then
......@@ -132,12 +134,12 @@ getClosestParentIdByType nId nType = do
getClosestParentIdByType (NodeId parentId) nType
_ -> pure Nothing
where
query :: DPS.Query
query :: PGS.Query
query = [sql|
SELECT n2.id, n2.typename
FROM nodes n1
JOIN nodes n2 ON n1.parent_id = n2.id
WHERE n1.id = ? AND 0 = ?;
WHERE n1.id = ?;
|]
-- | Similar to `getClosestParentIdByType` but includes current node
......@@ -147,7 +149,7 @@ getClosestParentIdByType' :: HasDBid NodeType
-> NodeType
-> Cmd err (Maybe NodeId)
getClosestParentIdByType' nId nType = do
result <- runPGSQuery query (nId, 0 :: Int)
result <- runPGSQuery query (PGS.Only nId)
case result of
[(NodeId id, pTypename)] -> do
if toDBid nType == pTypename then
......@@ -156,11 +158,11 @@ getClosestParentIdByType' nId nType = do
getClosestParentIdByType nId nType
_ -> pure Nothing
where
query :: DPS.Query
query :: PGS.Query
query = [sql|
SELECT n.id, n.typename
FROM nodes n
WHERE n.id = ? AND 0 = ?;
WHERE n.id = ?;
|]
-- | Given a node id, find all it's children (no matter how deep) of
......@@ -170,15 +172,15 @@ getChildrenByType :: HasDBid NodeType
-> NodeType
-> Cmd err [NodeId]
getChildrenByType nId nType = do
result <- runPGSQuery query (nId, 0 :: Int)
result <- runPGSQuery query (PGS.Only nId)
children_lst <- mapM (\(id, _) -> getChildrenByType id nType) result
pure $ concat $ [fst <$> filter (\(_, pTypename) -> pTypename == toDBid nType) result] ++ children_lst
where
query :: DPS.Query
query :: PGS.Query
query = [sql|
SELECT n.id, n.typename
FROM nodes n
WHERE n.parent_id = ? AND 0 = ?;
WHERE n.parent_id = ?;
|]
------------------------------------------------------------------------
......@@ -231,8 +233,8 @@ selectNodesIdWithType nt = proc () -> do
------------------------------------------------------------------------
nodeExists :: (HasNodeError err) => NodeId -> Cmd err Bool
nodeExists nId = (== [DPS.Only True])
<$> runPGSQuery [sql|SELECT true FROM nodes WHERE id = ? AND ?|] (nId, True)
nodeExists nId = (== [PGS.Only True])
<$> runPGSQuery [sql|SELECT true FROM nodes WHERE id = ? |] (PGS.Only nId)
getNode :: HasNodeError err => NodeId -> Cmd err (Node Value)
getNode nId = do
......@@ -397,4 +399,3 @@ defaultListMaybe cId = headMay <$> map (view node_id ) <$> getListsWithParentId
getListsWithParentId :: HasDBid NodeType => NodeId -> Cmd err [Node HyperdataList]
getListsWithParentId n = runOpaQuery $ selectNodesWith' n (Just NodeList)
......@@ -20,25 +20,27 @@ Ngrams connection to the Database.
module Gargantext.Database.Schema.Ngrams
where
import Data.Maybe (fromMaybe)
import Data.HashMap.Strict (HashMap)
import Data.Hashable (Hashable)
import Codec.Serialise (Serialise())
import Control.Lens (over)
import Control.Monad (mzero)
import Data.Maybe (fromMaybe)
import Data.HashMap.Strict (HashMap)
import Data.Hashable (Hashable)
import Data.Aeson
import Data.Aeson.Types (toJSONKeyText)
import Data.Map (fromList, lookup)
import Database.PostgreSQL.Simple.FromField (returnError, ResultError(..))
import Data.Text (Text, splitOn, pack, strip)
import Gargantext.Core.Types (TODO(..), Typed(..))
import Gargantext.Prelude
import Servant (FromHttpApiData(..), Proxy(..), ToHttpApiData(..))
import Text.Read (read)
import Gargantext.Core (HasDBid(..))
import Gargantext.Database.Types
import Gargantext.Database.Schema.Prelude
import qualified Database.PostgreSQL.Simple as PGS
import Text.Read (read)
import qualified Data.ByteString.Char8 as B
import qualified Data.HashMap.Strict as HashMap
import qualified Database.PostgreSQL.Simple as PGS
type NgramsId = Int
......@@ -82,8 +84,34 @@ ngramsTable = Table "ngrams" (pNgramsDb NgramsDB { _ngrams_id = optionalTable
-- ngrams in text fields of documents has Terms Type (i.e. either title or abstract)
data NgramsType = Authors | Institutes | Sources | NgramsTerms
deriving (Eq, Show, Read, Ord, Enum, Bounded, Generic)
instance Serialise NgramsType
instance FromJSON NgramsType
instance FromJSONKey NgramsType where
fromJSONKey = FromJSONKeyTextParser (parseJSON . String)
instance ToJSON NgramsType
instance ToJSONKey NgramsType where
toJSONKey = toJSONKeyText (pack . show)
instance FromHttpApiData NgramsType where
parseUrlPiece n = pure $ (read . cs) n
instance ToHttpApiData NgramsType where
toUrlPiece = pack . show
instance ToParamSchema NgramsType where
toParamSchema _ = toParamSchema (Proxy :: Proxy TODO)
-- map NgramsType to its assigned id
instance FromField NgramsType where
fromField fld mdata =
case B.unpack `fmap` mdata of
Nothing -> returnError UnexpectedNull fld ""
Just dat -> do
n <- fromField fld mdata
if (n :: Int) > 0 then
case fromNgramsTypeId (NgramsTypeId n) of
Nothing -> returnError ConversionFailed fld dat
Just nt -> pure nt
else
returnError ConversionFailed fld dat
instance ToField NgramsType where
toField nt = toField $ ngramsTypeId nt
ngramsTypes :: [NgramsType]
......@@ -96,33 +124,13 @@ instance ToSchema NgramsType
newtype NgramsTypeId = NgramsTypeId Int
deriving (Eq, Show, Ord, Num)
instance ToField NgramsTypeId where
toField (NgramsTypeId n) = toField n
instance FromField NgramsTypeId where
fromField fld mdata = do
n <- fromField fld mdata
if (n :: Int) > 0 then return $ NgramsTypeId n
else mzero
instance FromJSON NgramsType
instance FromJSONKey NgramsType where
fromJSONKey = FromJSONKeyTextParser (parseJSON . String)
instance ToJSON NgramsType
instance ToJSONKey NgramsType where
toJSONKey = toJSONKeyText (pack . show)
instance FromHttpApiData NgramsType where
parseUrlPiece n = pure $ (read . cs) n
instance ToHttpApiData NgramsType where
toUrlPiece = pack . show
instance ToParamSchema NgramsType where
toParamSchema _ = toParamSchema (Proxy :: Proxy TODO)
instance DefaultFromField (Nullable SqlInt4) NgramsTypeId
where
defaultFromField = fromPGSFromField
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment