Commit 2b0c0c9b authored by Alexandre Delanoë's avatar Alexandre Delanoë

[FIX] Score by Doc or Corpus.

parent 7643b2ea
......@@ -72,7 +72,7 @@ import GHC.Generics (Generic)
import Gargantext.Core.Utils.Prefix (unPrefix)
-- import Gargantext.Database.Schema.Ngrams (NgramsTypeId, ngramsTypeId, NgramsTableData(..))
import Gargantext.Database.Config (userMaster)
import Gargantext.Database.Metrics.NgramsByNode (getOccByNgramsOnlySafe)
import Gargantext.Database.Metrics.NgramsByNode (getOccByNgramsOnlySlow)
import Gargantext.Database.Schema.Ngrams (NgramsType)
import Gargantext.Database.Types.Node (NodeType(..))
import Gargantext.Database.Utils (fromField', HasConnection)
......@@ -236,7 +236,7 @@ ngramsElementFromRepo
, _ne_parent = p
, _ne_children = c
, _ne_ngrams = ngrams
, _ne_occurrences = panic "API.Ngrams._ne_occurrences"
, _ne_occurrences = 0 -- panic "API.Ngrams._ne_occurrences"
{-
-- Here we could use 0 if we want to avoid any `panic`.
-- It will not happen using getTableNgrams if
......@@ -875,14 +875,14 @@ type MaxSize = Int
getTableNgrams :: (RepoCmdM env err m, HasNodeError err, HasConnection env)
=> NodeId -> TabType
=> NodeType -> NodeId -> TabType
-> ListId -> Limit -> Maybe Offset
-> Maybe ListType
-> Maybe MinSize -> Maybe MaxSize
-> Maybe OrderBy
-> (NgramsTerm -> Bool)
-> m (Versioned NgramsTable)
getTableNgrams nId tabType listId limit_ offset
getTableNgrams nType nId tabType listId limit_ offset
listType minSize maxSize orderBy searchQuery = do
lIds <- selectNodesWithUsername NodeList userMaster
......@@ -902,12 +902,14 @@ getTableNgrams nId tabType listId limit_ offset
selected_inner roots n = maybe False (`Set.member` roots) (n ^. ne_root)
---------------------------------------
sortOnOrder Nothing = identity
sortOnOrder (Just TermAsc) = List.sortOn $ view ne_ngrams
sortOnOrder (Just TermDesc) = List.sortOn $ Down . view ne_ngrams
sortOnOrder (Just ScoreAsc) = List.sortOn $ view ne_occurrences
sortOnOrder (Just ScoreDesc) = List.sortOn $ Down . view ne_occurrences
---------------------------------------
selectAndPaginate tableMap (NgramsTable list) = NgramsTable $ roots <> inners
where
rootOf ne = maybe ne (\r -> ngramsElementFromRepo (r, fromMaybe (panic "getTableNgrams: invalid root") (tableMap ^. v_data . at r)))
......@@ -920,14 +922,20 @@ getTableNgrams nId tabType listId limit_ offset
rootsSet = Set.fromList (_ne_ngrams <$> roots)
inners = list & filter (selected_inner rootsSet)
---------------------------------------
setScores False table = pure table
setScores True table = do
occurrences <- getOccByNgramsOnlySafe nId (lIds <> [listId]) ngramsType (table ^.. v_data . _NgramsTable . each . ne_ngrams)
let ngrams_terms = (table ^.. v_data . _NgramsTable . each . ne_ngrams)
occurrences <- getOccByNgramsOnlySlow nType nId
(lIds <> [listId])
ngramsType
ngrams_terms
let
setOcc ne = ne & ne_occurrences .~ sumOf (at (ne ^. ne_ngrams) . _Just) occurrences
pure $ table & v_data . _NgramsTable . each %~ setOcc
---------------------------------------
-- lists <- catMaybes <$> listsWith userMaster
-- trace (show lists) $
......@@ -1003,7 +1011,7 @@ getTableNgramsCorpus :: (RepoCmdM env err m, HasNodeError err, HasConnection env
-> Maybe Text -- full text search
-> m (Versioned NgramsTable)
getTableNgramsCorpus nId tabType listId limit_ offset listType minSize maxSize orderBy mt =
getTableNgrams nId tabType listId limit_ offset listType minSize maxSize orderBy searchQuery
getTableNgrams NodeCorpus nId tabType listId limit_ offset listType minSize maxSize orderBy searchQuery
where
searchQuery = maybe (const True) isInfixOf mt
......@@ -1021,7 +1029,10 @@ getTableNgramsDoc dId tabType listId limit_ offset listType minSize maxSize orde
let ngramsType = ngramsTypeFromTabType tabType
ngs <- selectNgramsByDoc (ns <> [listId]) dId ngramsType
let searchQuery = flip S.member (S.fromList ngs)
getTableNgrams dId tabType listId limit_ offset listType minSize maxSize orderBy searchQuery
getTableNgrams NodeDocument dId tabType listId limit_ offset listType minSize maxSize orderBy searchQuery
......
......@@ -28,6 +28,7 @@ Portability : POSIX
module Gargantext.Database.Flow -- (flowDatabase, ngrams2list)
where
--import Debug.Trace (trace)
import Control.Lens ((^.), view, Lens', _Just)
import Control.Monad (mapM_)
import Control.Monad.IO.Class (liftIO)
......@@ -126,7 +127,6 @@ flowCorpusSearchInDatabase u la q = do
------------------------------------------------------------------------
flow :: (FlowCmdM env ServantErr m, FlowCorpus a, MkCorpus c)
=> Maybe c -> Username -> CorpusName -> TermType Lang -> [[a]] -> m CorpusId
flow c u cn la docs = do
......@@ -177,10 +177,7 @@ insertMasterDocs c lang hs = do
ids <- insertDb masterUserId masterCorpusId hs'
let documentsWithId = mergeData (toInserted ids) (DM.fromList $ map viewUniqId' hs')
docsWithNgrams <- documentIdWithNgrams (extractNgramsT lang) documentsWithId
let maps = mapNodeIdNgrams docsWithNgrams
maps <- mapNodeIdNgrams <$> documentIdWithNgrams (extractNgramsT lang) documentsWithId
terms2id <- insertNgrams $ DM.keys maps
let indexedNgrams = DM.mapKeys (indexNgrams terms2id) maps
......
......@@ -162,17 +162,21 @@ getOccByNgramsOnlyFast cId nt ngs =
fromListWith (+) <$> selectNgramsOccurrencesOnlyByNodeUser cId nt ngs
-- just slower than getOccByNgramsOnlyFast
getOccByNgramsOnlySlow :: CorpusId -> [ListId] -> NgramsType -> [Text]
getOccByNgramsOnlySlow :: NodeType -> CorpusId -> [ListId] -> NgramsType -> [Text]
-> Cmd err (Map Text Int)
getOccByNgramsOnlySlow cId ls nt ngs =
Map.map Set.size <$> getNodesByNgramsOnlyUser cId ls nt ngs
getOccByNgramsOnlySlow t cId ls nt ngs =
Map.map Set.size <$> getScore' t cId ls nt ngs
where
getScore' NodeCorpus = getNodesByNgramsOnlyUser
getScore' NodeDocument = getNgramsByDocOnlyUser
getScore' _ = getNodesByNgramsOnlyUser
getOccByNgramsOnlySafe :: CorpusId -> [ListId] -> NgramsType -> [Text]
-> Cmd err (Map Text Int)
getOccByNgramsOnlySafe cId ls nt ngs = do
printDebug "getOccByNgramsOnlySafe" (cId, nt, length ngs)
fast <- getOccByNgramsOnlyFast cId nt ngs
slow <- getOccByNgramsOnlySlow cId ls nt ngs
slow <- getOccByNgramsOnlySlow NodeCorpus cId ls nt ngs
when (fast /= slow) $
printDebug "getOccByNgramsOnlySafe: difference" (diff slow fast :: PatchMap Text (Replace (Maybe Int)))
pure slow
......@@ -209,7 +213,7 @@ queryNgramsOccurrencesOnlyByNodeUser = [sql|
GROUP BY nng.node2_id, ng.terms
|]
getNodesByNgramsOnlyUser :: CorpusId -> [ListId] -> NgramsType -> [Text]
getNodesByNgramsOnlyUser :: NodeId -> [ListId] -> NgramsType -> [Text]
-> Cmd err (Map Text (Set NodeId))
getNodesByNgramsOnlyUser cId ls nt ngs = Map.unionsWith (<>)
. map (fromListWith (<>) . map (second Set.singleton))
......@@ -248,6 +252,39 @@ queryNgramsOnlyByNodeUser = [sql|
getNgramsByDocOnlyUser :: NodeId -> [ListId] -> NgramsType -> [Text]
-> Cmd err (Map Text (Set NodeId))
getNgramsByDocOnlyUser cId ls nt ngs = Map.unionsWith (<>)
. map (fromListWith (<>) . map (second Set.singleton))
<$> mapM (selectNgramsOnlyByDocUser cId ls nt) (splitEvery 1000 ngs)
selectNgramsOnlyByDocUser :: DocId -> [ListId] -> NgramsType -> [Text]
-> Cmd err [(Text, NodeId)]
selectNgramsOnlyByDocUser dId ls nt tms =
runPGSQuery queryNgramsOnlyByDocUser
( Values fields (DPS.Only <$> tms)
, Values [QualifiedIdentifier Nothing "int4"] (DPS.Only <$> (map (\(NodeId n) -> n) ls))
, dId
, ngramsTypeId nt
)
where
fields = [QualifiedIdentifier Nothing "text"]
queryNgramsOnlyByDocUser :: DPS.Query
queryNgramsOnlyByDocUser = [sql|
WITH input_rows(terms) AS (?),
input_list(id) AS (?)
SELECT ng.terms, nng.node2_id FROM node_node_ngrams nng
JOIN ngrams ng ON nng.ngrams_id = ng.id
JOIN input_rows ir ON ir.terms = ng.terms
JOIN input_list il ON il.id = nng.node1_id
WHERE nng.node2_id = ? -- DocId
AND nng.ngrams_type = ? -- NgramsTypeId
GROUP BY ng.terms, nng.node2_id
|]
------------------------------------------------------------------------
......
......@@ -55,28 +55,26 @@ buildNgramsLists l n m s uCid mCid = do
pure $ Map.unions $ othersTerms <> [ngTerms]
buildNgramsOthersList :: UserCorpusId -> (Text -> Text) -> NgramsType
buildNgramsOthersList :: UserCorpusId -> (Text -> Text) -> NgramsType
-> Cmd err (Map NgramsType [NgramsElement])
buildNgramsOthersList uCid groupIt nt = do
ngs <- groupNodesByNgramsWith groupIt <$> getNodesByNgramsUser uCid nt
pure $ Map.fromList [(nt, [ mkNgramsElement t CandidateTerm Nothing (mSetFromList [])
| (t,_ns) <- Map.toList ngs
let
all' = Map.toList ngs
pure $ (toElements GraphTerm $ take 10 all') <> (toElements CandidateTerm $ drop 10 all')
where
toElements nType x = Map.fromList [(nt, [ mkNgramsElement t nType Nothing (mSetFromList [])
| (t,_ns) <- x
]
)
]
-- TODO remove hard coded parameters
buildNgramsTermsList :: Lang -> Int -> Int -> StopSize -> UserCorpusId -> MasterCorpusId
-> Cmd err (Map NgramsType [NgramsElement])
buildNgramsTermsList l n m s uCid mCid = do
candidates <- sortTficf <$> getTficf' uCid mCid NgramsTerms (ngramsGroup l n m)
--printDebug "candidate" (length candidates)
let termList = toTermList ((isStopTerm s) . fst) candidates
--let termList = toTermList ((\_ -> False) . fst) candidates
--printDebug "termlist" (length termList)
let ngs = List.concat $ map toNgramsElement termList
pure $ Map.fromList [(NgramsTerms, ngs)]
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment