Commit 4e9b6f41 authored by Alexandre Delanoë's avatar Alexandre Delanoë

[FIX] Ngrams in list

parent 04da4749
......@@ -39,7 +39,7 @@ import Gargantext.Database.Action.Flow.Types (FlowCmdM)
import Gargantext.Database.Action.Metrics.NgramsByContext (getOccByNgramsOnlyFast')
import Gargantext.Database.Admin.Types.Hyperdata.Document
import Gargantext.Database.Admin.Types.Node
import Gargantext.Database.Query.Table.NodeNode (selectDocNodes)
import Gargantext.Database.Query.Table.NodeContext (selectDocNodes)
import Gargantext.Database.Schema.Ngrams
import Gargantext.Database.Schema.Node
import Gargantext.Database.Types (Indexed(..))
......@@ -187,7 +187,7 @@ reIndexWith cId lId nt lts = do
]
)
(List.cycle [Map.fromList $ [(nt, Map.singleton (doc ^. node_id) 1 )]])
) docs
) (map context2node docs)
-- printDebug "ngramsByDoc" ngramsByDoc
......
......@@ -19,6 +19,7 @@ module Gargantext.API.Node.Corpus.Export
import Data.Map (Map)
import Data.Maybe (fromMaybe)
import Data.Set (Set)
import Data.Text (Text)
import qualified Data.List as List
import qualified Data.Map as Map
import qualified Data.Set as Set
......@@ -39,9 +40,9 @@ import Gargantext.Database.Prelude (Cmd)
import Gargantext.Database.Query.Table.Node
import Gargantext.Database.Query.Table.Node.Error (HasNodeError)
import Gargantext.Database.Query.Table.Node.Select (selectNodesWithUsername)
import Gargantext.Database.Query.Table.NodeNode (selectDocNodes)
import Gargantext.Database.Query.Table.NodeContext (selectDocNodes)
import Gargantext.Database.Schema.Ngrams (NgramsType(..))
import Gargantext.Database.Schema.Node (_node_id, _node_hyperdata)
import Gargantext.Database.Schema.Context (_context_id, _context_hyperdata)
import Gargantext.Prelude
--------------------------------------------------
......@@ -62,31 +63,32 @@ getCorpus cId lId nt' = do
Just l -> pure l
ns <- Map.fromList
<$> map (\n -> (_node_id n, n))
<$> map (\n -> (_context_id n, n))
<$> selectDocNodes cId
repo <- getRepo' [listId]
ngs <- getNodeNgrams cId listId nt repo
ngs <- getContextNgrams cId listId nt repo
let -- uniqId is hash computed already for each document imported in database
r = Map.intersectionWith
(\a b -> DocumentExport.Document { _d_document = a
(\a b -> DocumentExport.Document { _d_document = context2node a
, _d_ngrams = DocumentExport.Ngrams (Set.toList b) (hash b)
, _d_hash = d_hash a b }
) ns (Map.map (Set.map unNgramsTerm) ngs)
where
d_hash a b = hash [ fromMaybe "" (_hd_uniqId $ _node_hyperdata a)
d_hash :: Context HyperdataDocument -> Set Text -> Text
d_hash a b = hash [ fromMaybe "" (_hd_uniqId $ _context_hyperdata a)
, hash b
]
pure $ Corpus { _c_corpus = Map.elems r
, _c_hash = hash $ List.map DocumentExport._d_hash $ Map.elems r }
getNodeNgrams :: HasNodeError err
getContextNgrams :: HasNodeError err
=> CorpusId
-> ListId
-> NgramsType
-> NodeListStory
-> Cmd err (Map NodeId (Set NgramsTerm))
getNodeNgrams cId lId nt repo = do
-> Cmd err (Map ContextId (Set NgramsTerm))
getContextNgrams cId lId nt repo = do
-- lId <- case lId' of
-- Nothing -> defaultList cId
-- Just l -> pure l
......
......@@ -159,11 +159,11 @@ buildNgramsTermsList user uCid mCid mfslw groupParams (nt, _mapListSize)= do
-- Filter 0 With Double
-- Computing global speGen score
printDebug "[buldNgramsTermsList: Sample List] / start" nt
printDebug "[buildNgramsTermsList: Sample List] / start" nt
allTerms :: HashMap NgramsTerm Double <- getTficf_withSample uCid mCid nt
printDebug "[buldNgramsTermsList: Sample List / end]" nt
printDebug "[buildNgramsTermsList: Sample List / end]" nt
printDebug "[buldNgramsTermsList: Flow Social List / start]" nt
printDebug "[buildNgramsTermsList: Flow Social List / start]" nt
-- PrivateFirst for first developments since Public NodeMode is not implemented yet
socialLists :: FlowCont NgramsTerm FlowListScores
<- flowSocialList mfslw user nt ( FlowCont HashMap.empty
......@@ -171,7 +171,7 @@ buildNgramsTermsList user uCid mCid mfslw groupParams (nt, _mapListSize)= do
$ List.zip (HashMap.keys allTerms)
(List.cycle [mempty])
)
printDebug "[buldNgramsTermsList: Flow Social List / end]" nt
printDebug "[buildNgramsTermsList: Flow Social List / end]" nt
let ngramsKeys = HashMap.keysSet allTerms
......
......@@ -32,7 +32,7 @@ import Gargantext.Database.Action.Flow.Types
import Gargantext.Core.Viz.LegacyPhylo hiding (Svg, Dot)
import Gargantext.Database.Admin.Types.Hyperdata
import Gargantext.Database.Schema.Ngrams (NgramsType(..))
import Gargantext.Database.Query.Table.NodeNode (selectDocs)
import Gargantext.Database.Query.Table.NodeContext (selectDocs)
import Gargantext.Core.Types
import Gargantext.Core (HasDBid)
......
......@@ -268,7 +268,7 @@ flowCorpusUser l user corpusName ctype ids mfslw = do
let gp = GroupWithPosTag l CoreNLP HashMap.empty
ngs <- buildNgramsLists user userCorpusId masterCorpusId mfslw gp
printDebug "flowCorpusUser:ngs" ngs
-- printDebug "flowCorpusUser:ngs" ngs
_userListId <- flowList_DbRepo listId ngs
_mastListId <- getOrMkList masterCorpusId masterUserId
......@@ -329,8 +329,7 @@ saveDocNgramsWith lId mapNgramsDocs' = do
$ map (first _ngramsTerms . second Map.keys)
$ HashMap.toList mapNgramsDocs
printDebug "saveDocNgramsWith" mapCgramsId
-- printDebug "saveDocNgramsWith" mapCgramsId
-- insertDocNgrams
_return <- insertContextNodeNgrams2
$ catMaybes [ ContextNodeNgrams2 <$> Just nId
......
......@@ -21,7 +21,6 @@ import Control.Concurrent
import Control.Lens ((^.), (+~), (%~), at, (.~), _Just)
import Control.Monad.Reader
import Data.Map (Map, toList)
import Data.Maybe (catMaybes)
import Data.Text (Text)
import Gargantext.API.Ngrams (saveNodeStory)
import Gargantext.API.Ngrams.Tools (getNodeStoryVar)
......@@ -31,8 +30,8 @@ import Gargantext.Core.Types.Main (ListType(CandidateTerm))
import Gargantext.Core.NodeStory
import Gargantext.Database.Action.Flow.Types
import Gargantext.Database.Admin.Types.Node
import Gargantext.Database.Query.Table.NodeNgrams (NodeNgramsPoly(..), NodeNgramsW, listInsertDb, getCgramsId)
import Gargantext.Database.Query.Table.Node_NodeNgramsNodeNgrams
import Gargantext.Database.Query.Table.NodeNgrams (NodeNgramsPoly(..), NodeNgramsW{-, listInsertDb, getCgramsId -})
-- import Gargantext.Database.Query.Table.Node_NodeNgramsNodeNgrams
import Gargantext.Database.Schema.Ngrams (NgramsType(..))
import Gargantext.Prelude
import qualified Data.List as List
......@@ -89,18 +88,21 @@ flowList_DbRepo :: FlowCmdM env err m
-> m ListId
flowList_DbRepo lId ngs = do
-- printDebug "listId flowList" lId
{-
mapCgramsId <- listInsertDb lId toNodeNgramsW (Map.toList ngs)
let toInsert = catMaybes [ (,) <$> (getCgramsId mapCgramsId ntype <$> (unNgramsTerm <$> parent))
<*> getCgramsId mapCgramsId ntype ngram
| (ntype, ngs') <- Map.toList ngs
, NgramsElement { _ne_ngrams = NgramsTerm ngram
, _ne_parent = parent } <- ngs'
]
-}
-- Inserting groups of ngrams
_r <- insert_Node_NodeNgrams_NodeNgrams
$ map (\(a,b) -> Node_NodeNgrams_NodeNgrams lId a b Nothing) toInsert
-- _r <- insert_Node_NodeNgrams_NodeNgrams
-- $ map (\(a,b) -> Node_NodeNgrams_NodeNgrams lId a b Nothing) toInsert
printDebug "flowList_Tficf':ngs" ngs
-- printDebug "flowList_Tficf':ngs" ngs
listInsert lId ngs
--trace (show $ List.filter (\n -> _ne_ngrams n == "versatile") $ List.concat $ Map.elems ngs) $ listInsert lId ngs
......
......@@ -208,16 +208,16 @@ selectNgramsOccurrencesOnlyByContextUser cId nt tms =
queryNgramsOccurrencesOnlyByContextUser :: DPS.Query
queryNgramsOccurrencesOnlyByContextUser = [sql|
WITH input_rows(terms) AS (?)
SELECT ng.terms, COUNT(cng.node_id) FROM context_node_ngrams cng
JOIN ngrams ng ON cng.ngrams_id = ng.id
JOIN input_rows ir ON ir.terms = ng.terms
JOIN nodes_nodes nn ON nn.node_id = cng.node_id
JOIN nodes n ON nn.node_id = n.id
WHERE nn.node1_id = ? -- CorpusId
SELECT ng.terms, COUNT(cng.context_id) FROM context_node_ngrams cng
JOIN ngrams ng ON cng.ngrams_id = ng.id
JOIN input_rows ir ON ir.terms = ng.terms
JOIN nodes_contexts nn ON nn.context_id = cng.context_id
JOIN nodes n ON nn.node_id = n.id
WHERE nn.node_id = ? -- CorpusId
AND n.typename = ? -- toDBid
AND cng.ngrams_type = ? -- NgramsTypeId
AND nn.category > 0
GROUP BY cng.node_id, ng.terms
GROUP BY cng.context_id, ng.terms
|]
......@@ -242,17 +242,17 @@ selectNgramsOccurrencesOnlyByContextUser_withSample cId int nt tms =
queryNgramsOccurrencesOnlyByContextUser_withSample :: DPS.Query
queryNgramsOccurrencesOnlyByContextUser_withSample = [sql|
WITH nodes_sample AS (SELECT id FROM nodes n TABLESAMPLE SYSTEM_ROWS (?)
JOIN nodes_nodes nn ON n.id = nn.node2_id
WITH nodes_sample AS (SELECT id FROM contexts n TABLESAMPLE SYSTEM_ROWS (?)
JOIN nodes_contexts nn ON n.id = nn.context_id
WHERE n.typename = ?
AND nn.node1_id = ?),
AND nn.node_id = ?),
input_rows(terms) AS (?)
SELECT ng.terms, COUNT(cng.node_id) FROM context_node_ngrams cng
SELECT ng.terms, COUNT(cng.context_id) FROM context_node_ngrams cng
JOIN ngrams ng ON cng.ngrams_id = ng.id
JOIN input_rows ir ON ir.terms = ng.terms
JOIN nodes_nodes nn ON nn.node2_id = cng.node_id
JOIN nodes_sample n ON nn.node2_id = n.id
WHERE nn.node1_id = ? -- CorpusId
JOIN nodes_contexts nn ON nn.context_id = cng.context_id
JOIN nodes_sample n ON nn.context_id = n.id
WHERE nn.node_id = ? -- CorpusId
AND cng.ngrams_type = ? -- NgramsTypeId
AND nn.category > 0
GROUP BY cng.node_id, ng.terms
......
......@@ -46,6 +46,8 @@ getTficf cId mId nt = do
countLocal <- selectCountDocs cId
countGlobal <- selectCountDocs mId
printDebug "getTficf" (mapTextDoubleLocal, mapTextDoubleGlobal, countLocal, countGlobal)
pure $ HM.mapWithKey (\t n ->
tficf (TficfInfra (Count n )
(Total $ fromIntegral countLocal))
......@@ -71,6 +73,7 @@ getTficf_withSample cId mId nt = do
<$> getOccByNgramsOnlyFast_withSample mId countGlobal nt
(HM.keys mapTextDoubleLocal)
printDebug "getTficf_withSample" (mapTextDoubleLocal, mapTextDoubleGlobal, countLocal, countGlobal)
pure $ HM.mapWithKey (\t n ->
tficf (TficfInfra (Count n )
(Total $ fromIntegral countLocal))
......
......@@ -18,7 +18,7 @@ module Gargantext.Database.Admin.Trigger.ContextNodeNgrams
import Database.PostgreSQL.Simple.SqlQQ (sql)
import Gargantext.Core
import Gargantext.Core.Types.Main (ListType(CandidateTerm))
-- import Gargantext.Core.Types.Main (ListType(CandidateTerm))
import Gargantext.Database.Admin.Types.Node -- (ListId, CorpusId, NodeId)
import Gargantext.Database.Prelude (Cmd, execPGSQuery)
import Gargantext.Prelude
......@@ -105,6 +105,7 @@ triggerCountInsert2 = execPGSQuery query ( toDBid NodeCorpus
-- TODO add the groups
-- TODO use context instead of nodes of type doc
{-
triggerCoocInsert :: HasDBid NodeType => Cmd err Int64
triggerCoocInsert = execPGSQuery query ( toDBid NodeCorpus
, toDBid NodeDocument
......@@ -160,3 +161,4 @@ triggerCoocInsert = execPGSQuery query ( toDBid NodeCorpus
FOR EACH STATEMENT
EXECUTE PROCEDURE set_cooc();
|]
-}
......@@ -21,7 +21,7 @@ import Database.PostgreSQL.Simple.SqlQQ (sql)
import Gargantext.Core
import Gargantext.Database.Admin.Config
import Gargantext.Database.Admin.Types.Node -- (ListId, CorpusId, NodeId)
import Gargantext.Core.Types.Main (ListType(CandidateTerm))
-- import Gargantext.Core.Types.Main (ListType(CandidateTerm))
import Gargantext.Database.Prelude (Cmd, execPGSQuery)
import Gargantext.Prelude
import qualified Database.PostgreSQL.Simple as DPS
......@@ -157,6 +157,7 @@ triggerUpdateDel lId = execPGSQuery query (lId, nodeTypeId NodeList)
|]
-- TODO add groups
{-
triggerCoocInsert :: MasterListId -> Cmd err Int64
triggerCoocInsert lid = execPGSQuery query ( lid
-- , nodeTypeId NodeCorpus
......@@ -213,4 +214,4 @@ triggerCoocInsert lid = execPGSQuery query ( lid
FOR EACH STATEMENT
EXECUTE PROCEDURE nodes_nodes_set_cooc();
|]
-}
......@@ -141,7 +141,7 @@ selectCountDocs cId = runCountOpaQuery (queryCountDocs cId)
restrict -< nc^.nc_node_id .== (toNullable $ pgNodeId cId')
restrict -< nc^.nc_category .>= (toNullable $ sqlInt4 1)
restrict -< c^.context_typename .== (sqlInt4 $ toDBid NodeDocument)
returnA -< c
returnA -< c
-- | TODO use UTCTime fast
......@@ -177,7 +177,7 @@ joinInCorpus :: O.Select (ContextRead, NodeContextReadNull)
joinInCorpus = leftJoin queryContextTable queryNodeContextTable cond
where
cond :: (ContextRead, NodeContextRead) -> Column SqlBool
cond (c, nc) = c^.context_id .== nc^.nc_node_id
cond (c, nc) = c^.context_id .== nc^.nc_context_id
joinOn1 :: O.Select (NodeRead, NodeContextReadNull)
......
......@@ -21,21 +21,15 @@ commentary with @some markup@.
module Gargantext.Database.Query.Table.NodeNode
( module Gargantext.Database.Schema.NodeNode
, queryNodeNodeTable
, selectNodesDates
, selectDocNodes
, selectDocs
, getNodeNode
, insertNodeNode
, deleteNodeNode
, selectPublicNodes
, selectCountDocs
)
where
import Control.Arrow (returnA)
import Control.Lens (view, (^.))
import Data.Maybe (catMaybes)
import Data.Text (Text, splitOn)
import Control.Lens ((^.))
import qualified Opaleye as O
import Opaleye
......@@ -119,59 +113,6 @@ deleteNodeNode n1 n2 = mkCmd $ \conn ->
rCount
)
------------------------------------------------------------------------
selectCountDocs :: HasDBid NodeType => CorpusId -> Cmd err Int
selectCountDocs cId = runCountOpaQuery (queryCountDocs cId)
where
queryCountDocs cId' = proc () -> do
(n, nn) <- joinInCorpus -< ()
restrict -< nn^.nn_node1_id .== (toNullable $ pgNodeId cId')
restrict -< nn^.nn_category .>= (toNullable $ sqlInt4 1)
restrict -< n^.node_typename .== (sqlInt4 $ toDBid NodeDocument)
returnA -< n
-- | TODO use UTCTime fast
selectNodesDates :: HasDBid NodeType => CorpusId -> Cmd err [Text]
selectNodesDates cId = map (head' "selectDocsDates" . splitOn "-")
<$> catMaybes
<$> map (view hd_publication_date)
<$> selectDocs cId
selectDocs :: HasDBid NodeType => CorpusId -> Cmd err [HyperdataDocument]
selectDocs cId = runOpaQuery (queryDocs cId)
queryDocs :: HasDBid NodeType => CorpusId -> O.Select (Column SqlJsonb)
queryDocs cId = proc () -> do
(n, nn) <- joinInCorpus -< ()
restrict -< nn^.nn_node1_id .== (toNullable $ pgNodeId cId)
restrict -< nn^.nn_category .>= (toNullable $ sqlInt4 1)
restrict -< n^.node_typename .== (sqlInt4 $ toDBid NodeDocument)
returnA -< view (node_hyperdata) n
selectDocNodes :: HasDBid NodeType =>CorpusId -> Cmd err [Node HyperdataDocument]
selectDocNodes cId = runOpaQuery (queryDocNodes cId)
queryDocNodes :: HasDBid NodeType =>CorpusId -> O.Select NodeRead
queryDocNodes cId = proc () -> do
(n, nn) <- joinInCorpus -< ()
restrict -< nn^.nn_node1_id .== (toNullable $ pgNodeId cId)
restrict -< nn^.nn_category .>= (toNullable $ sqlInt4 1)
restrict -< n^.node_typename .== (sqlInt4 $ toDBid NodeDocument)
returnA -< n
joinInCorpus :: O.Select (NodeRead, NodeNodeReadNull)
joinInCorpus = leftJoin queryNodeTable queryNodeNodeTable cond
where
cond :: (NodeRead, NodeNodeRead) -> Column SqlBool
cond (n, nn) = nn^.nn_node2_id .== (view node_id n)
joinOn1 :: O.Select (NodeRead, NodeNodeReadNull)
joinOn1 = leftJoin queryNodeTable queryNodeNodeTable cond
where
cond :: (NodeRead, NodeNodeRead) -> Column SqlBool
cond (n, nn) = nn^.nn_node1_id .== n^.node_id
------------------------------------------------------------------------
selectPublicNodes :: HasDBid NodeType => (Hyperdata a, DefaultFromField SqlJsonb a)
=> Cmd err [(Node a, Maybe Int)]
......@@ -183,3 +124,8 @@ queryWithType nt = proc () -> do
restrict -< n^.node_typename .== (sqlInt4 $ toDBid nt)
returnA -< (n, nn^.nn_node2_id)
joinOn1 :: O.Select (NodeRead, NodeNodeReadNull)
joinOn1 = leftJoin queryNodeTable queryNodeNodeTable cond
where
cond :: (NodeRead, NodeNodeRead) -> Column SqlBool
cond (n, nn) = nn^.nn_node1_id .== n^.node_id
......@@ -29,11 +29,9 @@ Next Step benchmark:
{-# LANGUAGE TemplateHaskell #-}
module Gargantext.Database.Query.Table.Node_NodeNgramsNodeNgrams
( module Gargantext.Database.Schema.Node_NodeNgramsNodeNgrams
, insert_Node_NodeNgrams_NodeNgrams
)
where
{-
import Gargantext.Database.Schema.Prelude
import Gargantext.Database.Prelude (Cmd, runOpaQuery, mkCmd)
import Gargantext.Database.Admin.Types.Node (pgNodeId)
......@@ -68,3 +66,4 @@ insert_Node_NodeNgrams_NodeNgrams_W ns =
, iReturning = rCount
, iOnConflict = (Just DoNothing)
}
-}
......@@ -31,6 +31,7 @@ Next Step benchmark:
module Gargantext.Database.Schema.Node_NodeNgramsNodeNgrams
where
{-
import Gargantext.Database.Schema.Prelude
import Gargantext.Database.Admin.Types.Node (CorpusId)
import Gargantext.Database.Schema.Node()
......@@ -84,4 +85,4 @@ instance DefaultFromField SqlInt4 (Maybe Int) where
instance DefaultFromField SqlFloat8 (Maybe Double) where
defaultFromField = fromPGSFromField
-}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment