Commit 94a53969 authored by Alexandre Delanoë's avatar Alexandre Delanoë

[NodeNodeNgram] First Node is now NodeListId

parent cb76a5bb
......@@ -910,7 +910,7 @@ getTableNgramsDoc :: (RepoCmdM env err m, HasNodeError err, HasConnection env)
-> Maybe Text -- full text search
-> m (Versioned NgramsTable)
getTableNgramsDoc cId dId tabType listId limit_ offset listType minSize maxSize _mt = do
ns <- selectNodesWithUsername NodeCorpus userMaster
ns <- selectNodesWithUsername NodeList userMaster
let ngramsType = ngramsTypeFromTabType tabType
ngs <- selectNgramsByDoc (ns <> [cId]) dId ngramsType
let searchQuery = flip S.member (S.fromList ngs)
......@@ -958,7 +958,9 @@ getTableNgrams nId tabType listId limit_ offset
-- getNgramsTableMap ({-lists <>-} listIds) ngramsType
table <- getNgramsTableMap listId ngramsType & mapped . v_data %~ finalize
occurrences <- getOccByNgramsOnlySafe nId ngramsType (table ^.. v_data . _NgramsTable . each . ne_ngrams)
lIds <- selectNodesWithUsername NodeList userMaster
occurrences <- getOccByNgramsOnlySafe nId (lIds <> [listId]) ngramsType (table ^.. v_data . _NgramsTable . each . ne_ngrams)
let
setOcc ne = ne & ne_occurrences .~ sumOf (at (ne ^. ne_ngrams) . _Just) occurrences
......
......@@ -183,7 +183,9 @@ insertMasterDocs c lang hs = do
terms2id <- insertNgrams $ DM.keys maps
let indexedNgrams = DM.mapKeys (indexNgrams terms2id) maps
_ <- insertDocNgrams masterCorpusId indexedNgrams
lId <- getOrMkList masterCorpusId masterUserId
_ <- insertDocNgrams lId indexedNgrams
pure $ map reId ids
......
......@@ -22,9 +22,10 @@ import Data.Map (Map)
import Data.Text (Text)
import Gargantext.API.Ngrams (TabType(..), ngramsTypeFromTabType)
import Gargantext.API.Ngrams.Tools (filterListWithRoot, groupNodesByNgrams, Diagonal(..), getCoocByNgrams, mapTermListRoot, RootTerm)
import Gargantext.Core.Types (ListType(..), Limit)
import Gargantext.Core.Types (ListType(..), Limit, NodeType(..))
import Gargantext.Database.Flow (FlowCmdM)
import Gargantext.Database.Metrics.NgramsByNode (getNodesByNgramsOnlyUser, getTficfWith)
import Gargantext.Database.Node.Select
import Gargantext.Database.Schema.Node (defaultList)
import Gargantext.Database.Types.Node (ListId, CorpusId, HyperdataCorpus)
import Gargantext.Database.Flow (getOrMkRootWithCorpus)
......@@ -51,7 +52,10 @@ getMetrics cId maybeListId tabType maybeLimit = do
(_masterUserId, _masterRootId, masterCorpusId) <- getOrMkRootWithCorpus userMaster "" (Nothing :: Maybe HyperdataCorpus)
metrics' <- getTficfWith cId masterCorpusId (ngramsTypeFromTabType tabType) ngs'
lId <- defaultList cId
lIds <- selectNodesWithUsername NodeList userMaster
metrics' <- getTficfWith cId masterCorpusId (lIds <> [lId]) (ngramsTypeFromTabType tabType) ngs'
pure (ngs , toScored [metrics, Map.fromList $ map (\(a,b) -> (a, Vec.fromList [fst b])) $ Map.toList metrics'])
......@@ -79,10 +83,13 @@ getNgramsCooc cId maybeListId tabType maybeLimit = do
let
take' Nothing xs = xs
take' (Just n) xs = take n xs
lId <- defaultList cId
lIds <- selectNodesWithUsername NodeList userMaster
myCooc <- Map.filter (>1) <$> getCoocByNgrams (Diagonal True)
<$> groupNodesByNgrams ngs
<$> getNodesByNgramsOnlyUser cId (ngramsTypeFromTabType tabType)
<$> getNodesByNgramsOnlyUser cId (lIds <> [lId]) (ngramsTypeFromTabType tabType)
(take' maybeLimit $ Map.keys ngs)
pure $ (ngs', ngs, myCooc)
......
{-|
Module : Gargantext.Database.Metrics.Count
Description : Ngram connection to the Database
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
Count Ngrams by Context
-}
{-# LANGUAGE QuasiQuotes #-}
{-# LANGUAGE Arrows #-}
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE QuasiQuotes #-}
{-# LANGUAGE RankNTypes #-}
module Gargantext.Database.Metrics.Count where
{-
import Control.Arrow (returnA)
import Control.Lens (view)
import Data.Map.Strict (Map, fromListWith, elems)
import Data.Monoid (mempty)
import Data.Text (Text)
import Database.PostgreSQL.Simple.SqlQQ (sql)
import Gargantext.API.Ngrams (NgramsElement, mkNgramsElement)
import Gargantext.Core.Types.Main (listTypeId, ListType(..))
import Gargantext.Database.Access
import Gargantext.Database.Config (nodeTypeId)
import Gargantext.Database.Queries.Join (leftJoin4, leftJoin3)
import Gargantext.Database.Schema.Ngrams
import Gargantext.Database.Schema.Ngrams (NgramsId, NgramsType(..), ngramsTypeId, Ngrams(..), NgramsIndexed(..), ngrams, ngramsTerms, fromNgramsTypeId)
import Gargantext.Database.Schema.Node
import Gargantext.Database.Schema.Node (HasNodeError(..))
import Gargantext.Database.Schema.NodeNgram
import Gargantext.Database.Schema.NodeNode
--import Gargantext.Database.Schema.NodeNodeNgrams
import Gargantext.Database.Types.Node -- (ListId, CorpusId, NodeId)
import Gargantext.Database.Utils
import Gargantext.Database.Utils (Cmd, runPGSQuery)
import Gargantext.Prelude hiding (sum)
import Gargantext.Text.Metrics.Count (Coocs, coocOn)
import Opaleye
import Safe (headMay)
import qualified Database.PostgreSQL.Simple as PGS
getNgramsByNode :: NodeId -> NgramsType -> Cmd err [[Text]]
getNgramsByNode nId nt = elems
<$> fromListWith (<>)
<$> map (\(i,t) -> (i,[t]))
<$> getNgramsByNodeNodeIndexed nId nt
-- | TODO add join with nodeNodeNgram (if it exists)
getNgramsByNodeNodeIndexed :: NodeId -> NgramsType -> Cmd err [(NodeId, Text)]
getNgramsByNodeNodeIndexed nId nt = runOpaQuery (select' nId nt)
where
select' nId' nt' = proc () -> do
(ng,(nng,(nn,n))) <- getNgramsByNodeNodeIndexedJoin -< ()
restrict -< _node_id n .== toNullable (pgNodeId nId')
restrict -< nng_ngramsType nng .== toNullable (pgNgramsTypeId $ ngramsTypeId nt')
restrict -< nn_delete nn ./= (toNullable . pgBool) True
returnA -< (nng_node_id nng, ngrams_terms ng)
getNgramsByNodeNodeIndexedJoin :: Query ( NgramsRead
, (NodeNgramReadNull
, (NodeNodeReadNull
, NodeReadNull
)
)
)
getNgramsByNodeNodeIndexedJoin = leftJoin4 queryNodeTable
queryNodeNodeTable
queryNodeNgramTable
queryNgramsTable
c1 c2 c3
where
c1 :: (NodeNodeRead, NodeRead) -> Column PGBool
c1 (nn,n) = nn_node1_id nn .== _node_id n
c2 :: ( NodeNgramRead
, (NodeNodeRead
, NodeReadNull
)
) -> Column PGBool
c2 (nng,(nn',_)) = (nng_node_id nng) .== nn_node2_id nn'
c3 :: ( NgramsRead
, ( NodeNgramRead
, ( NodeNodeReadNull
, NodeReadNull
)
)
) -> Column PGBool
c3 (ng,(nng',(_,_))) = (ngrams_id ng) .== nng_ngrams_id nng'
{-
getNgramsByNodeNodeIndexedJoin5 :: Query ( NodeNodesNgramsRead
, (NgramsReadNull
, (NodeNgramReadNull
, (NodeNodeReadNull
, NodeReadNull
)
)
)
)
getNgramsByNodeNodeIndexedJoin5 = leftJoin5 queryNodeTable
queryNodeNodeTable
queryNodeNgramTable
queryNgramsTable
queryNodeNodeNgramsTable
c1 c2 c3 c4
where
c1 :: (NodeNodeRead, NodeRead) -> Column PGBool
c1 (nn,n) = nn_node1_id nn .== _node_id n
c2 :: ( NodeNgramRead
, (NodeNodeRead
, NodeReadNull
)
) -> Column PGBool
c2 (nng,(nn',_)) = (nng_node_id nng) .== nn_node2_id nn'
c3 :: ( NgramsRead
, ( NodeNgramRead
, ( NodeNodeReadNull
, NodeReadNull
)
)
) -> Column PGBool
c3 (ng,(nng',(_,_))) = (ngrams_id ng) .== nng_ngrams_id nng'
c4 :: ( NodeNodeNgramsRead
, (NgramsRead
, ( NodeNgramReadNull
, ( NodeNodeReadNull
, NodeReadNull
)
)
)
) -> Column PGBool
c4 (nnng,(_,(_,(nn,_)))) = (toNullable $ nnng_node1_id nnng) .== (nn_node1_id nn)
.&& (toNullable $ nnng_node2_id nnng) .== (nn_node2_id nn)
--}
--{-
getNgramsElementsWithParentNodeId :: NodeId -> Cmd err (Map NgramsType [NgramsElement])
getNgramsElementsWithParentNodeId nId = do
ns <- getNgramsWithParentNodeId nId
pure $ fromListWith (<>)
[ (maybe (panic "error") identity $ fromNgramsTypeId nt,
[mkNgramsElement ng CandidateTerm Nothing mempty])
| (_,(nt,ng)) <- ns
]
-------------------------------------------------------------------------
getNgramsWithParentNodeId :: NodeId -> Cmd err [(NodeId, (NgramsTypeId, Text))]
getNgramsWithParentNodeId nId = runOpaQuery (select nId)
where
select nId' = proc () -> do
(ng,(nng,n)) <- getNgramsWithParentNodeIdJoin -< ()
restrict -< _node_parentId n .== (toNullable $ pgNodeId nId')
restrict -< _node_typename n .== (toNullable $ pgInt4 $ nodeTypeId NodeDocument)
returnA -< (nng_node_id nng, (nng_ngramsType nng, ngrams_terms ng))
--}
getNgramsWithParentNodeIdJoin :: Query ( NgramsRead
, ( NodeNgramReadNull
, NodeReadNull
)
)
getNgramsWithParentNodeIdJoin = leftJoin3 queryNodeTable queryNodeNgramTable queryNgramsTable on1 on2
where
on1 :: (NodeNgramRead, NodeRead) -> Column PGBool
on1 (nng,n) = nng_node_id nng .== _node_id n
on2 :: (NgramsRead, (NodeNgramRead, NodeReadNull))-> Column PGBool
on2 (ng, (nng,_)) = ngrams_id ng .== nng_ngrams_id nng
countCorpusDocuments :: Roles -> Int -> Cmd err Int
countCorpusDocuments r cId = maybe 0 identity
<$> headMay
<$> map (\(PGS.Only n) -> n)
<$> runQuery' r cId
where
runQuery' RoleUser cId' = runPGSQuery
"SELECT count(*) from nodes_nodes nn WHERE nn.node1_id = ? AND nn.delete = False"
(PGS.Only cId')
runQuery' RoleMaster cId' = runPGSQuery
"SELECT count(*) from nodes n WHERE n.parent_id = ? AND n.typename = ?"
(cId', nodeTypeId NodeDocument)
-}
......@@ -69,11 +69,11 @@ getTficf' u m nt f = do
(countNodesByNgramsWith f m')
--{-
getTficfWith :: UserCorpusId -> MasterCorpusId
getTficfWith :: UserCorpusId -> MasterCorpusId -> [ListId]
-> NgramsType -> Map Text (Maybe Text)
-> Cmd err (Map Text (Double, Set Text))
getTficfWith u m nt mtxt = do
u' <- getNodesByNgramsOnlyUser u nt (Map.keys mtxt)
getTficfWith u m ls nt mtxt = do
u' <- getNodesByNgramsOnlyUser u ls nt (Map.keys mtxt)
m' <- getNodesByNgramsMaster u m
let f x = case Map.lookup x mtxt of
......@@ -162,17 +162,17 @@ getOccByNgramsOnlyFast cId nt ngs =
fromListWith (+) <$> selectNgramsOccurrencesOnlyByNodeUser cId nt ngs
-- just slower than getOccByNgramsOnlyFast
getOccByNgramsOnlySlow :: CorpusId -> NgramsType -> [Text]
getOccByNgramsOnlySlow :: CorpusId -> [ListId] -> NgramsType -> [Text]
-> Cmd err (Map Text Int)
getOccByNgramsOnlySlow cId nt ngs =
Map.map Set.size <$> getNodesByNgramsOnlyUser cId nt ngs
getOccByNgramsOnlySlow cId ls nt ngs =
Map.map Set.size <$> getNodesByNgramsOnlyUser cId ls nt ngs
getOccByNgramsOnlySafe :: CorpusId -> NgramsType -> [Text]
getOccByNgramsOnlySafe :: CorpusId -> [ListId] -> NgramsType -> [Text]
-> Cmd err (Map Text Int)
getOccByNgramsOnlySafe cId nt ngs = do
getOccByNgramsOnlySafe cId ls nt ngs = do
printDebug "getOccByNgramsOnlySafe" (cId, nt, length ngs)
fast <- getOccByNgramsOnlyFast cId nt ngs
slow <- getOccByNgramsOnlySlow cId nt ngs
slow <- getOccByNgramsOnlySlow cId ls nt ngs
when (fast /= slow) $
printDebug "getOccByNgramsOnlySafe: difference" (diff slow fast :: PatchMap Text (Replace (Maybe Int)))
pure slow
......@@ -209,17 +209,18 @@ queryNgramsOccurrencesOnlyByNodeUser = [sql|
GROUP BY nng.node2_id, ng.terms
|]
getNodesByNgramsOnlyUser :: CorpusId -> NgramsType -> [Text]
getNodesByNgramsOnlyUser :: CorpusId -> [ListId] -> NgramsType -> [Text]
-> Cmd err (Map Text (Set NodeId))
getNodesByNgramsOnlyUser cId nt ngs = Map.unionsWith (<>)
getNodesByNgramsOnlyUser cId ls nt ngs = Map.unionsWith (<>)
. map (fromListWith (<>) . map (second Set.singleton))
<$> mapM (selectNgramsOnlyByNodeUser cId nt) (splitEvery 1000 ngs)
<$> mapM (selectNgramsOnlyByNodeUser cId ls nt) (splitEvery 1000 ngs)
selectNgramsOnlyByNodeUser :: CorpusId -> NgramsType -> [Text]
selectNgramsOnlyByNodeUser :: CorpusId -> [ListId] -> NgramsType -> [Text]
-> Cmd err [(Text, NodeId)]
selectNgramsOnlyByNodeUser cId nt tms =
selectNgramsOnlyByNodeUser cId ls nt tms =
runPGSQuery queryNgramsOnlyByNodeUser
( Values fields (DPS.Only <$> tms)
, Values [QualifiedIdentifier Nothing "int4"] (DPS.Only <$> (map (\(NodeId n) -> n) ls))
, cId
, nodeTypeId NodeDocument
, ngramsTypeId nt
......@@ -230,10 +231,12 @@ selectNgramsOnlyByNodeUser cId nt tms =
queryNgramsOnlyByNodeUser :: DPS.Query
queryNgramsOnlyByNodeUser = [sql|
WITH input_rows(terms) AS (?)
WITH input_rows(terms) AS (?),
input_list(id) AS (?)
SELECT ng.terms, nng.node2_id FROM node_node_ngrams nng
JOIN ngrams ng ON nng.ngrams_id = ng.id
JOIN input_rows ir ON ir.terms = ng.terms
JOIN input_list il ON il.id = nng.node1_id
JOIN nodes_nodes nn ON nn.node2_id = nng.node2_id
JOIN nodes n ON nn.node2_id = n.id
WHERE nn.node1_id = ? -- CorpusId
......@@ -243,6 +246,10 @@ queryNgramsOnlyByNodeUser = [sql|
GROUP BY ng.terms, nng.node2_id
|]
------------------------------------------------------------------------
-- | TODO filter by language, database, any social field
getNodesByNgramsMaster :: UserCorpusId -> MasterCorpusId -> Cmd err (Map Text (Set NodeId))
......
......@@ -9,9 +9,10 @@ Portability : POSIX
-}
{-# LANGUAGE Arrows #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE Arrows #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE RankNTypes #-}
module Gargantext.Database.Node.Select where
......@@ -24,20 +25,17 @@ import Gargantext.Database.Schema.User
import Gargantext.Core.Types.Individu (Username)
import Control.Arrow (returnA)
--{-
selectNodesWithUsername :: NodeType -> Username -> Cmd err [NodeId]
selectNodesWithUsername nt u = runOpaQuery (q u)
where
q u' = proc () -> do
(n,usrs) <- join -< ()
restrict -< user_username usrs .== (toNullable $ pgStrictText u')
restrict -< _node_typename n .== (pgInt4 $ nodeTypeId nt)
returnA -< _node_id n
join :: Query (NodeRead, UserReadNull)
join = leftJoin queryNodeTable queryUserTable on1
where
on1 (n,us) = _node_userId n .== user_id us
q u' = proc () -> do
(n,usrs) <- join -< ()
restrict -< user_username usrs .== (toNullable $ pgStrictText u')
restrict -< _node_typename n .== (pgInt4 $ nodeTypeId nt)
returnA -< _node_id n
......@@ -65,6 +65,7 @@ newtype NodeId = NodeId Int
instance ToField NodeId where
toField (NodeId n) = toField n
instance FromField NodeId where
fromField field mdata = do
n <- fromField field mdata
......
......@@ -24,9 +24,11 @@ import Data.List (unzip, sortOn)
import Data.Map (toList)
import GHC.Generics (Generic)
import Gargantext.Prelude
import Gargantext.Database.Config
import Gargantext.Database.Schema.NodeNode (selectDocsDates)
import Gargantext.Database.Utils
import Gargantext.Database.Types.Node (CorpusId)
import Gargantext.Database.Node.Select
import Gargantext.Text.Metrics.Count (occurrencesWith)
import Gargantext.Core.Types.Main
......@@ -68,6 +70,7 @@ pieData :: FlowCmdM env err m
=> CorpusId -> NgramsType -> ListType
-> m Histo
pieData cId nt lt = do
ls' <- selectNodesWithUsername NodeList userMaster
ls <- map (_node_id) <$> getListsWithParentId cId
ts <- mapTermListRoot ls nt
let
......@@ -78,7 +81,7 @@ pieData cId nt lt = do
Just x' -> maybe x identity x'
(_total,mapTerms) <- countNodesByNgramsWith (group dico)
<$> getNodesByNgramsOnlyUser cId nt terms
<$> getNodesByNgramsOnlyUser cId (ls' <> ls) nt terms
let (dates, count) = unzip $ map (\(t,(d,_)) -> (t, d)) $ Map.toList mapTerms
pure (Histo dates (map round count))
......@@ -89,6 +92,7 @@ treeData :: FlowCmdM env err m
=> CorpusId -> NgramsType -> ListType
-> m [MyTree]
treeData cId nt lt = do
ls' <- selectNodesWithUsername NodeList userMaster
ls <- map (_node_id) <$> getListsWithParentId cId
ts <- mapTermListRoot ls nt
......@@ -96,7 +100,7 @@ treeData cId nt lt = do
dico = filterListWithRoot lt ts
terms = catMaybes $ List.concat $ map (\(a,b) -> [Just a, b]) $ Map.toList dico
cs' <- getNodesByNgramsOnlyUser cId nt terms
cs' <- getNodesByNgramsOnlyUser cId (ls' <> ls) nt terms
m <- getListNgrams ls nt
pure $ toTree lt cs' m
......@@ -106,6 +110,7 @@ treeData' :: FlowCmdM env ServantErr m
=> CorpusId -> NgramsType -> ListType
-> m [MyTree]
treeData' cId nt lt = do
ls' <- selectNodesWithUsername NodeList userMaster
ls <- map (_node_id) <$> getListsWithParentId cId
ts <- mapTermListRoot ls nt
......@@ -113,10 +118,9 @@ treeData' cId nt lt = do
dico = filterListWithRoot lt ts
terms = catMaybes $ List.concat $ map (\(a,b) -> [Just a, b]) $ Map.toList dico
cs' <- getNodesByNgramsOnlyUser cId nt terms
cs' <- getNodesByNgramsOnlyUser cId (ls' <> ls) nt terms
m <- getListNgrams ls nt
pure $ toTree lt cs' m
......@@ -28,8 +28,10 @@ import Control.Monad.IO.Class (liftIO)
import Gargantext.API.Ngrams.Tools
import Gargantext.API.Types
import Gargantext.Core.Types.Main
import Gargantext.Database.Config
import Gargantext.Database.Metrics.NgramsByNode (getNodesByNgramsOnlyUser)
import Gargantext.Database.Schema.Ngrams
import Gargantext.Database.Node.Select
import Gargantext.Database.Schema.Node (getNode)
import Gargantext.Database.Schema.Node (defaultList)
import Gargantext.Database.Types.Node hiding (node_id) -- (GraphId, ListId, CorpusId, NodeId)
......@@ -65,13 +67,14 @@ getGraph nId = do
]
-- (map (\n -> LegendField n "#FFFFFF" (pack $ show n)) [1..10])
let cId = maybe (panic "no parentId") identity $ _node_parentId nodeGraph
lIds <- selectNodesWithUsername NodeList userMaster
lId <- defaultList cId
ngs <- filterListWithRoot GraphTerm <$> mapTermListRoot [lId] NgramsTerms
myCooc <- Map.filter (>1) <$> getCoocByNgrams (Diagonal False)
<$> groupNodesByNgrams ngs
<$> getNodesByNgramsOnlyUser cId NgramsTerms (Map.keys ngs)
<$> getNodesByNgramsOnlyUser cId (lIds <> [lId]) NgramsTerms (Map.keys ngs)
graph <- liftIO $ cooc2graph myCooc
pure $ set graph_metadata (Just metadata)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment