Commit 5d0a03bc authored by Alexandre Delanoë's avatar Alexandre Delanoë

Merge branch 'dev-ngrams-table' into dev

[MERGE] fix merge
[NgramsTable] fix order and bugs of groups
parents e99e4e05 9187d327
Pipeline #128 failed with stage
...@@ -39,9 +39,10 @@ import Data.Monoid ...@@ -39,9 +39,10 @@ import Data.Monoid
--import Data.Semigroup --import Data.Semigroup
import Data.Set (Set) import Data.Set (Set)
import qualified Data.Set as Set import qualified Data.Set as Set
--import Data.Maybe (catMaybes) import Data.Maybe (isJust)
import Data.Tuple.Extra (first)
-- import qualified Data.Map.Strict as DM -- import qualified Data.Map.Strict as DM
import Data.Map.Strict (Map) import Data.Map.Strict (Map, mapKeys, fromListWith)
--import qualified Data.Set as Set --import qualified Data.Set as Set
import Control.Lens (makeLenses, Prism', prism', (^..), (.~), (#), to, withIndex, folded, ifolded) import Control.Lens (makeLenses, Prism', prism', (^..), (.~), (#), to, withIndex, folded, ifolded)
import Control.Monad (guard) import Control.Monad (guard)
...@@ -57,10 +58,9 @@ import GHC.Generics (Generic) ...@@ -57,10 +58,9 @@ import GHC.Generics (Generic)
import Gargantext.Core.Utils.Prefix (unPrefix) import Gargantext.Core.Utils.Prefix (unPrefix)
import Gargantext.Database.Types.Node (NodeType(..)) import Gargantext.Database.Types.Node (NodeType(..))
import Gargantext.Database.Schema.Node (defaultList, HasNodeError) import Gargantext.Database.Schema.Node (defaultList, HasNodeError)
import Gargantext.Database.Schema.Ngrams (NgramsType, NgramsTypeId, ngramsTypeId) import Gargantext.Database.Schema.Ngrams (NgramsType, NgramsTypeId, ngramsTypeId, NgramsTableData'(..))
import qualified Gargantext.Database.Schema.Ngrams as Ngrams import qualified Gargantext.Database.Schema.Ngrams as Ngrams
import Gargantext.Database.Schema.NodeNgram import Gargantext.Database.Schema.NodeNgram
import Gargantext.Database.Schema.NodeNgramsNgrams
import Gargantext.Database.Utils (Cmd) import Gargantext.Database.Utils (Cmd)
import Gargantext.Prelude import Gargantext.Prelude
import Gargantext.Core.Types (ListType(..), ListTypeId, ListId, CorpusId, Limit, Offset, listTypeId) import Gargantext.Core.Types (ListType(..), ListTypeId, ListId, CorpusId, Limit, Offset, listTypeId)
...@@ -119,6 +119,29 @@ instance Arbitrary NgramsElement where ...@@ -119,6 +119,29 @@ instance Arbitrary NgramsElement where
newtype NgramsTable = NgramsTable { _ngramsTable :: [NgramsElement] } newtype NgramsTable = NgramsTable { _ngramsTable :: [NgramsElement] }
deriving (Ord, Eq, Generic, ToJSON, FromJSON, Show) deriving (Ord, Eq, Generic, ToJSON, FromJSON, Show)
-- | TODO Check N and Weight
toNgramsElement :: [NgramsTableData'] -> [NgramsElement]
toNgramsElement ns = map toNgramsElement' ns
where
toNgramsElement' (NgramsTableData' _ p t _ lt w) = NgramsElement t lt' (round w) p' c'
where
p' = case p of
Nothing -> Nothing
Just x -> lookup x mapParent
c' = maybe mempty identity $ lookup t mapChildren
lt' = maybe (panic "API.Ngrams: listypeId") identity lt
mapParent :: Map Int Text
mapParent = fromListWith (<>) $ map (\(NgramsTableData' i _ t _ _ _) -> (i,t)) ns
mapChildren :: Map Text (Set Text)
mapChildren = mapKeys (\i -> (maybe (panic "API.Ngrams.mapChildren: ParentId with no Terms: Impossible") identity $ lookup i mapParent))
$ fromListWith (<>)
$ map (first fromJust)
$ filter (isJust . fst)
$ map (\(NgramsTableData' _ p t _ _ _) -> (p, Set.singleton t)) ns
instance Arbitrary NgramsTable where instance Arbitrary NgramsTable where
arbitrary = elements arbitrary = elements
[ NgramsTable [ NgramsTable
...@@ -278,10 +301,11 @@ mkListsUpdate nt patches = ...@@ -278,10 +301,11 @@ mkListsUpdate nt patches =
] ]
mkChildrenGroups :: (PatchSet NgramsTerm -> Set NgramsTerm) mkChildrenGroups :: (PatchSet NgramsTerm -> Set NgramsTerm)
-> NgramsType
-> NgramsTablePatch -> NgramsTablePatch
-> [(NgramsParent, NgramsChild, Maybe Double)] -> [(NgramsTypeId, NgramsParent, NgramsChild)]
mkChildrenGroups addOrRem patches = mkChildrenGroups addOrRem nt patches =
[ (parent, child, Just 1) [ (ngramsTypeId nt, parent, child)
| (parent, patch) <- patches ^.. ntp_ngrams_patches . ifolded . withIndex | (parent, patch) <- patches ^.. ntp_ngrams_patches . ifolded . withIndex
, child <- patch ^.. patch_children . to addOrRem . folded , child <- patch ^.. patch_children . to addOrRem . folded
] ]
...@@ -289,14 +313,14 @@ mkChildrenGroups addOrRem patches = ...@@ -289,14 +313,14 @@ mkChildrenGroups addOrRem patches =
ngramsTypeFromTabType :: Maybe TabType -> NgramsType ngramsTypeFromTabType :: Maybe TabType -> NgramsType
ngramsTypeFromTabType maybeTabType = ngramsTypeFromTabType maybeTabType =
let lieu = "Garg.API.Ngrams: " :: Text in let lieu = "Garg.API.Ngrams: " :: Text in
case maybeTabType of case maybeTabType of
Nothing -> Ngrams.Sources -- panic (lieu <> "Indicate the Table") Nothing -> panic (lieu <> "Indicate the Table")
Just tab -> case tab of Just tab -> case tab of
Sources -> Ngrams.Sources Sources -> Ngrams.Sources
Authors -> Ngrams.Authors Authors -> Ngrams.Authors
Institutes -> Ngrams.Institutes Institutes -> Ngrams.Institutes
Terms -> Ngrams.NgramsTerms Terms -> Ngrams.NgramsTerms
_ -> panic $ lieu <> "No Ngrams for this tab" _ -> panic $ lieu <> "No Ngrams for this tab"
-- Apply the given patch to the DB and returns the patch to be applied on the -- Apply the given patch to the DB and returns the patch to be applied on the
...@@ -315,8 +339,8 @@ tableNgramsPatch corpusId maybeTabType maybeList (Versioned version patch) = do ...@@ -315,8 +339,8 @@ tableNgramsPatch corpusId maybeTabType maybeList (Versioned version patch) = do
updateNodeNgrams $ NodeNgramsUpdate updateNodeNgrams $ NodeNgramsUpdate
{ _nnu_user_list_id = listId { _nnu_user_list_id = listId
, _nnu_lists_update = mkListsUpdate ngramsType patch , _nnu_lists_update = mkListsUpdate ngramsType patch
, _nnu_rem_children = mkChildrenGroups _rem patch , _nnu_rem_children = mkChildrenGroups _rem ngramsType patch
, _nnu_add_children = mkChildrenGroups _add patch , _nnu_add_children = mkChildrenGroups _add ngramsType patch
} }
pure $ Versioned 1 emptyNgramsTablePatch pure $ Versioned 1 emptyNgramsTablePatch
...@@ -327,7 +351,6 @@ getTableNgrams :: HasNodeError err ...@@ -327,7 +351,6 @@ getTableNgrams :: HasNodeError err
-> Maybe ListId -> Maybe Limit -> Maybe Offset -> Maybe ListId -> Maybe Limit -> Maybe Offset
-> Cmd err (Versioned NgramsTable) -> Cmd err (Versioned NgramsTable)
getTableNgrams cId maybeTabType maybeListId mlimit moffset = do getTableNgrams cId maybeTabType maybeListId mlimit moffset = do
let lieu = "Garg.API.Ngrams: " :: Text
let ngramsType = ngramsTypeFromTabType maybeTabType let ngramsType = ngramsTypeFromTabType maybeTabType
listId <- maybe (defaultList cId) pure maybeListId listId <- maybe (defaultList cId) pure maybeListId
...@@ -336,18 +359,9 @@ getTableNgrams cId maybeTabType maybeListId mlimit moffset = do ...@@ -336,18 +359,9 @@ getTableNgrams cId maybeTabType maybeListId mlimit moffset = do
limit_ = maybe defaultLimit identity mlimit limit_ = maybe defaultLimit identity mlimit
offset_ = maybe 0 identity moffset offset_ = maybe 0 identity moffset
(ngramsTableDatas, mapToParent, mapToChildren) <- ngramsTableDatas <-
Ngrams.getNgramsTableDb NodeDocument ngramsType (Ngrams.NgramsTableParam listId cId) limit_ offset_ Ngrams.getNgramsTableDb NodeDocument ngramsType (Ngrams.NgramsTableParam listId cId) limit_ offset_
-- printDebug "ngramsTableDatas" ngramsTableDatas -- printDebug "ngramsTableDatas" ngramsTableDatas
pure $ Versioned 1 $ pure $ Versioned 1 $ NgramsTable (toNgramsElement ngramsTableDatas)
NgramsTable $ map (\(Ngrams.NgramsTableData ngs _ lt w) ->
NgramsElement ngs
(maybe (panic $ lieu <> "listType") identity lt)
(round w)
(lookup ngs mapToParent)
(maybe mempty identity $ lookup ngs mapToChildren)
) ngramsTableDatas
...@@ -315,7 +315,7 @@ ngrams2list m = ...@@ -315,7 +315,7 @@ ngrams2list m =
-- | TODO: weight of the list could be a probability -- | TODO: weight of the list could be a probability
insertLists :: HasNodeError err => ListId -> [(ListType, (NgramsType, NgramsIndexed))] -> Cmd err Int insertLists :: HasNodeError err => ListId -> [(ListType, (NgramsType, NgramsIndexed))] -> Cmd err Int
insertLists lId lngs = insertNodeNgrams [ NodeNgram lId (_ngramsId ng) (ngramsTypeId ngt) (fromIntegral $ listTypeId l) 1 insertLists lId lngs = insertNodeNgrams [ NodeNgram lId (_ngramsId ng) Nothing (ngramsTypeId ngt) (fromIntegral $ listTypeId l) 1
| (l,(ngt, ng)) <- lngs | (l,(ngt, ng)) <- lngs
] ]
------------------------------------------------------------------------ ------------------------------------------------------------------------
...@@ -54,9 +54,10 @@ data DocumentIdWithNgrams a = ...@@ -54,9 +54,10 @@ data DocumentIdWithNgrams a =
, document_ngrams :: Map (NgramsT Ngrams) Int , document_ngrams :: Map (NgramsT Ngrams) Int
} deriving (Show) } deriving (Show)
-- | TODO for now, list Type is CandidateList, why ? -- | TODO for now, list Type is CandidateList because Graph Terms
-- have to be detected in next step in the flow
insertToNodeNgrams :: Map NgramsIndexed (Map NgramsType (Map NodeId Int)) -> Cmd err Int insertToNodeNgrams :: Map NgramsIndexed (Map NgramsType (Map NodeId Int)) -> Cmd err Int
insertToNodeNgrams m = insertNodeNgrams [ NodeNgram n (_ngramsId ng) (ngramsTypeId t) (listTypeId CandidateList) (fromIntegral i) insertToNodeNgrams m = insertNodeNgrams [ NodeNgram n (_ngramsId ng) Nothing (ngramsTypeId t) (listTypeId CandidateList) (fromIntegral i)
| (ng, t2n2i) <- DM.toList m | (ng, t2n2i) <- DM.toList m
, (t, n2i) <- DM.toList t2n2i , (t, n2i) <- DM.toList t2n2i
, (n, i) <- DM.toList n2i , (n, i) <- DM.toList n2i
......
This diff is collapsed.
This diff is collapsed.
...@@ -34,21 +34,14 @@ module Gargantext.Database.Schema.NodeNgramsNgrams ...@@ -34,21 +34,14 @@ module Gargantext.Database.Schema.NodeNgramsNgrams
import Control.Lens (view) import Control.Lens (view)
import Control.Lens.TH (makeLensesWith, abbreviatedFields) import Control.Lens.TH (makeLensesWith, abbreviatedFields)
import Control.Monad (void)
import Control.Monad.IO.Class (liftIO) import Control.Monad.IO.Class (liftIO)
import Data.ByteString (ByteString)
import Data.Text (Text)
import Data.Maybe (Maybe) import Data.Maybe (Maybe)
import Data.Profunctor.Product.TH (makeAdaptorAndInstance) import Data.Profunctor.Product.TH (makeAdaptorAndInstance)
import Database.PostgreSQL.Simple.SqlQQ (sql) import Gargantext.Database.Utils (Cmd, runOpaQuery, connection)
import Database.PostgreSQL.Simple.Types (Values(..), QualifiedIdentifier(..))
import Debug.Trace (trace)
import Gargantext.Database.Utils (Cmd, runOpaQuery, execPGSQuery, connection, formatPGSQuery)
import Gargantext.Database.Types.Node (ListId) import Gargantext.Database.Types.Node (ListId)
import Gargantext.Database.Schema.Node (pgNodeId) import Gargantext.Database.Schema.Node (pgNodeId)
import Gargantext.Prelude import Gargantext.Prelude
import Opaleye import Opaleye
import qualified Database.PostgreSQL.Simple as PGS
data NodeNgramsNgramsPoly node_id ngram1_id ngram2_id weight = data NodeNgramsNgramsPoly node_id ngram1_id ngram2_id weight =
NodeNgramsNgrams { _nng_NodeId :: node_id NodeNgramsNgrams { _nng_NodeId :: node_id
...@@ -122,65 +115,3 @@ insertNodeNgramsNgramsW ns = do ...@@ -122,65 +115,3 @@ insertNodeNgramsNgramsW ns = do
c <- view connection c <- view connection
liftIO $ fromIntegral <$> runInsertMany c nodeNgramsNgramsTable ns liftIO $ fromIntegral <$> runInsertMany c nodeNgramsNgramsTable ns
------------------------------------------------------------------------
data Action = Del | Add
type NgramsParent = Text
type NgramsChild = Text
ngramsGroup :: Action -> ListId -> [(NgramsParent, NgramsChild, Maybe Double)]
-> Cmd err ()
ngramsGroup _ _ [] = pure ()
ngramsGroup action listId ngs = trace (show ngs) $ runNodeNgramsNgrams q listId ngs
where
q = case action of
Del -> queryDelNodeNgramsNgrams
Add -> queryInsertNodeNgramsNgrams
runNodeNgramsNgrams :: PGS.Query -> ListId -> [(NgramsParent, NgramsChild, Maybe Double)] -> Cmd err ()
runNodeNgramsNgrams q listId ngs = void $ execPGSQuery q (listId, Values fields ngs')
where
ngs' = map (\(ng1,ng2,w) -> (ng1,ng2,maybe 0 identity w)) ngs
fields = map (\t -> QualifiedIdentifier Nothing t)
["int4","text","text","float8"]
runNodeNgramsNgramsDebug :: PGS.Query -> ListId -> [(NgramsParent, NgramsChild, Maybe Double)] -> Cmd err ByteString
runNodeNgramsNgramsDebug q listId ngs = formatPGSQuery q (listId, Values fields ngs')
where
ngs' = map (\(ng1,ng2,w) -> (ng1,ng2,maybe 0 identity w)) ngs
fields = map (\t -> QualifiedIdentifier Nothing t)
["int4","text","text","float8"]
--------------------------------------------------------------------
-- TODO: on conflict update weight
queryInsertNodeNgramsNgrams :: PGS.Query
queryInsertNodeNgramsNgrams = [sql|
WITH nId AS ?
WITH input_rows(ng1,ng2,w) AS (?)
INSERT INTO nodes_ngrams_ngrams (node_id,ngram1_id,ngram2_id,weight)
SELECT nId,ngrams1.id,ngrams2.id,w FROM input_rows
JOIN ngrams ngrams1 ON ngrams1.terms = ng1
JOIN ngrams ngrams2 ON ngrams2.terms = ng2
ON CONFLICT (node_id,ngram1_id,ngram2_id) DO NOTHING -- unique index created here
|]
queryDelNodeNgramsNgrams :: PGS.Query
queryDelNodeNgramsNgrams = [sql|
WITH nId AS ?
WITH input(ng1,ng2,w) AS (?)
DELETE FROM nodes_ngrams_ngrams AS nnn
USING ngrams AS ngrams1,
ngrams AS ngrams2,
input AS input
WHERE
ngrams1.terms = input.ng1
AND ngrams2.terms = input.ng2
AND nnn.node_id = input.nId
AND nnn.ngram1_id = ngrams1.id
AND nnn.ngram2_id = ngrams2.id
;
|]
...@@ -54,12 +54,14 @@ CREATE TABLE public.nodes_ngrams ( ...@@ -54,12 +54,14 @@ CREATE TABLE public.nodes_ngrams (
id SERIAL, id SERIAL,
node_id integer NOT NULL, node_id integer NOT NULL,
ngrams_id integer NOT NULL, ngrams_id integer NOT NULL,
parent_id integer REFERENCES public.nodes_ngrams(id) ON DELETE SET NULL,
ngrams_type integer, ngrams_type integer,
list_type integer, list_type integer,
weight double precision, weight double precision,
FOREIGN KEY (node_id) REFERENCES public.nodes(id) ON DELETE CASCADE, FOREIGN KEY (node_id) REFERENCES public.nodes(id) ON DELETE CASCADE,
FOREIGN KEY (ngrams_id) REFERENCES public.ngrams(id) ON DELETE CASCADE, FOREIGN KEY (ngrams_id) REFERENCES public.ngrams(id) ON DELETE CASCADE,
PRIMARY KEY (node_id,ngrams_id) PRIMARY KEY (id)
-- PRIMARY KEY (node_id,ngrams_id)
); );
ALTER TABLE public.nodes_ngrams OWNER TO gargantua; ALTER TABLE public.nodes_ngrams OWNER TO gargantua;
...@@ -103,10 +105,10 @@ CREATE UNIQUE INDEX nodes_expr_idx2 ON public.nodes USING btree (((hyperdata ->> ...@@ -103,10 +105,10 @@ CREATE UNIQUE INDEX nodes_expr_idx2 ON public.nodes USING btree (((hyperdata ->>
CREATE UNIQUE INDEX nodes_typename_parent_id_expr_idx ON public.nodes USING btree (typename, parent_id, ((hyperdata ->> 'uniqId'::text))); CREATE UNIQUE INDEX nodes_typename_parent_id_expr_idx ON public.nodes USING btree (typename, parent_id, ((hyperdata ->> 'uniqId'::text)));
CREATE INDEX nodes_user_id_typename_parent_id_idx ON public.nodes USING btree (user_id, typename, parent_id); CREATE INDEX nodes_user_id_typename_parent_id_idx ON public.nodes USING btree (user_id, typename, parent_id);
CREATE UNIQUE INDEX ON public.ngrams(terms); CREATE UNIQUE INDEX ON public.ngrams(terms);
--CREATE UNIQUE INDEX ON public.ngrams(terms,n); --CREATE UNIQUE INDEX ON public.ngrams(terms,n);
CREATE UNIQUE INDEX ON public.nodes_ngrams USING btree (node_id,ngrams_id);
CREATE INDEX nodes_ngrams_ngrams_id_idx ON public.nodes_ngrams USING btree (ngrams_id); CREATE INDEX nodes_ngrams_ngrams_id_idx ON public.nodes_ngrams USING btree (ngrams_id);
CREATE INDEX nodes_ngrams_ngrams_node_id_idx ON public.nodes_ngrams_ngrams USING btree (node_id); CREATE INDEX nodes_ngrams_ngrams_node_id_idx ON public.nodes_ngrams_ngrams USING btree (node_id);
CREATE UNIQUE INDEX ON public.nodes_ngrams USING btree (node_id,ngrams_id,ngrams_type); CREATE UNIQUE INDEX ON public.nodes_ngrams USING btree (node_id,ngrams_id,ngrams_type);
......
...@@ -86,33 +86,24 @@ data DbTreeNode = DbTreeNode { dt_nodeId :: NodeId ...@@ -86,33 +86,24 @@ data DbTreeNode = DbTreeNode { dt_nodeId :: NodeId
dbTree :: RootId -> Cmd err [DbTreeNode] dbTree :: RootId -> Cmd err [DbTreeNode]
dbTree rootId = map (\(nId, tId, pId, n) -> DbTreeNode nId tId pId n) <$> runPGSQuery [sql| dbTree rootId = map (\(nId, tId, pId, n) -> DbTreeNode nId tId pId n) <$> runPGSQuery [sql|
WITH RECURSIVE WITH RECURSIVE
-- starting node(s) tree (id, typename, parent_id, name) AS
starting (id, typename, parent_id, name) AS
( (
SELECT n.id, n.typename, n.parent_id, n.name SELECT p.id, p.typename, p.parent_id, p.name
FROM nodes AS n FROM nodes AS p
WHERE n.parent_id = ? -- this can be arbitrary WHERE p.id = ?
),
descendants (id, typename, parent_id, name) AS UNION
(
SELECT id, typename, parent_id, name SELECT c.id, c.typename, c.parent_id, c.name
FROM starting FROM nodes AS c
UNION ALL
SELECT n.id, n.typename, n.parent_id, n.name INNER JOIN tree AS s ON c.parent_id = s.id
FROM nodes AS n JOIN descendants AS d ON n.parent_id = d.id WHERE c.typename IN (2,3,30,31,5,7,9)
where n.typename in (2,3,30,31,5,7,9)
),
ancestors (id, typename, parent_id, name) AS
(
SELECT n.id, n.typename, n.parent_id, n.name
FROM nodes AS n
WHERE n.id IN (SELECT parent_id FROM starting)
UNION ALL
SELECT n.id, n.typename, n.parent_id, n.name
FROM nodes AS n JOIN ancestors AS a ON n.id = a.parent_id
) )
TABLE ancestors SELECT * from tree;
UNION ALL
TABLE descendants ;
|] (Only rootId) |] (Only rootId)
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment