Commit a1bda6a4 authored by Alexandre Delanoë's avatar Alexandre Delanoë

[FIX] BUG limit on Nodes by Ngrams count.

parent 2b0c0c9b
...@@ -39,7 +39,7 @@ import Data.Monoid ...@@ -39,7 +39,7 @@ import Data.Monoid
import Data.Text (Text, splitOn, intercalate) import Data.Text (Text, splitOn, intercalate)
import GHC.Show (Show) import GHC.Show (Show)
import Gargantext.API.Ngrams (HasRepoVar) import Gargantext.API.Ngrams (HasRepoVar)
import Gargantext.API.Ngrams (NgramsElement, putListNgrams, RepoCmdM) import Gargantext.API.Ngrams (NgramsElement(..), putListNgrams, RepoCmdM)
import Gargantext.Core (Lang(..)) import Gargantext.Core (Lang(..))
import Gargantext.Core.Types (NodePoly(..), Terms(..)) import Gargantext.Core.Types (NodePoly(..), Terms(..))
import Gargantext.Core.Types.Individu (Username) import Gargantext.Core.Types.Individu (Username)
...@@ -65,7 +65,8 @@ import Gargantext.Text.Terms (extractTerms) ...@@ -65,7 +65,8 @@ import Gargantext.Text.Terms (extractTerms)
import Gargantext.Text.Terms.Mono.Stem.En (stemIt) import Gargantext.Text.Terms.Mono.Stem.En (stemIt)
import Servant (ServantErr) import Servant (ServantErr)
import System.FilePath (FilePath) import System.FilePath (FilePath)
import qualified Data.Map as DM --import qualified Data.List as List
import qualified Data.Map as Map
import qualified Data.Text as Text import qualified Data.Text as Text
import qualified Gargantext.Database.Node.Document.Add as Doc (add) import qualified Gargantext.Database.Node.Document.Add as Doc (add)
import qualified Gargantext.Text.Parsers.GrandDebat as GD import qualified Gargantext.Text.Parsers.GrandDebat as GD
...@@ -175,11 +176,11 @@ insertMasterDocs c lang hs = do ...@@ -175,11 +176,11 @@ insertMasterDocs c lang hs = do
-- TODO Type NodeDocumentUnicised -- TODO Type NodeDocumentUnicised
let hs' = map addUniqId hs let hs' = map addUniqId hs
ids <- insertDb masterUserId masterCorpusId hs' ids <- insertDb masterUserId masterCorpusId hs'
let documentsWithId = mergeData (toInserted ids) (DM.fromList $ map viewUniqId' hs') let documentsWithId = mergeData (toInserted ids) (Map.fromList $ map viewUniqId' hs')
maps <- mapNodeIdNgrams <$> documentIdWithNgrams (extractNgramsT lang) documentsWithId maps <- mapNodeIdNgrams <$> documentIdWithNgrams (extractNgramsT lang) documentsWithId
terms2id <- insertNgrams $ DM.keys maps terms2id <- insertNgrams $ Map.keys maps
let indexedNgrams = DM.mapKeys (indexNgrams terms2id) maps let indexedNgrams = Map.mapKeys (indexNgrams terms2id) maps
lId <- getOrMkList masterCorpusId masterUserId lId <- getOrMkList masterCorpusId masterUserId
_ <- insertDocNgrams lId indexedNgrams _ <- insertDocNgrams lId indexedNgrams
...@@ -247,7 +248,7 @@ viewUniqId' d = maybe err (\h -> (h,d)) (view uniqId d) ...@@ -247,7 +248,7 @@ viewUniqId' d = maybe err (\h -> (h,d)) (view uniqId d)
toInserted :: [ReturnId] -> Map HashId ReturnId toInserted :: [ReturnId] -> Map HashId ReturnId
toInserted = DM.fromList . map (\r -> (reUniqId r, r) ) toInserted = Map.fromList . map (\r -> (reUniqId r, r) )
. filter (\r -> reInserted r == True) . filter (\r -> reInserted r == True)
data DocumentWithId a = DocumentWithId data DocumentWithId a = DocumentWithId
...@@ -258,7 +259,7 @@ data DocumentWithId a = DocumentWithId ...@@ -258,7 +259,7 @@ data DocumentWithId a = DocumentWithId
mergeData :: Map HashId ReturnId mergeData :: Map HashId ReturnId
-> Map HashId a -> Map HashId a
-> [DocumentWithId a] -> [DocumentWithId a]
mergeData rs = catMaybes . map toDocumentWithId . DM.toList mergeData rs = catMaybes . map toDocumentWithId . Map.toList
where where
toDocumentWithId (hash,hpd) = toDocumentWithId (hash,hpd) =
DocumentWithId <$> fmap reId (lookup hash rs) DocumentWithId <$> fmap reId (lookup hash rs)
...@@ -288,7 +289,7 @@ instance ExtractNgramsT HyperdataContact ...@@ -288,7 +289,7 @@ instance ExtractNgramsT HyperdataContact
$ maybe ["Nothing"] (\a -> [a]) $ maybe ["Nothing"] (\a -> [a])
$ view (hc_who . _Just . cw_lastName) hc' $ view (hc_who . _Just . cw_lastName) hc'
pure $ DM.fromList $ [(a', DM.singleton Authors 1) | a' <- authors ] pure $ Map.fromList $ [(a', Map.singleton Authors 1) | a' <- authors ]
...@@ -325,15 +326,15 @@ extractNgramsT' lang hd = filterNgramsT 255 <$> extractNgramsT'' lang hd ...@@ -325,15 +326,15 @@ extractNgramsT' lang hd = filterNgramsT 255 <$> extractNgramsT'' lang hd
<$> concat <$> concat
<$> liftIO (extractTerms lang' leText) <$> liftIO (extractTerms lang' leText)
pure $ DM.fromList $ [(source, DM.singleton Sources 1)] pure $ Map.fromList $ [(source, Map.singleton Sources 1)]
<> [(i', DM.singleton Institutes 1) | i' <- institutes ] <> [(i', Map.singleton Institutes 1) | i' <- institutes ]
<> [(a', DM.singleton Authors 1) | a' <- authors ] <> [(a', Map.singleton Authors 1) | a' <- authors ]
<> [(t', DM.singleton NgramsTerms 1) | t' <- terms' ] <> [(t', Map.singleton NgramsTerms 1) | t' <- terms' ]
filterNgramsT :: Int -> Map Ngrams (Map NgramsType Int) filterNgramsT :: Int -> Map Ngrams (Map NgramsType Int)
-> Map Ngrams (Map NgramsType Int) -> Map Ngrams (Map NgramsType Int)
filterNgramsT s ms = DM.fromList $ map (\a -> filter' s a) $ DM.toList ms filterNgramsT s ms = Map.fromList $ map (\a -> filter' s a) $ Map.toList ms
where where
filter' s' (ng@(Ngrams t n),y) = case (Text.length t) < s' of filter' s' (ng@(Ngrams t n),y) = case (Text.length t) < s' of
True -> (ng,y) True -> (ng,y)
...@@ -357,11 +358,11 @@ documentIdWithNgrams f = mapM toDocumentIdWithNgrams ...@@ -357,11 +358,11 @@ documentIdWithNgrams f = mapM toDocumentIdWithNgrams
-- | TODO check optimization -- | TODO check optimization
mapNodeIdNgrams :: [DocumentIdWithNgrams a] mapNodeIdNgrams :: [DocumentIdWithNgrams a]
-> Map Ngrams (Map NgramsType (Map NodeId Int)) -> Map Ngrams (Map NgramsType (Map NodeId Int))
mapNodeIdNgrams = DM.unionsWith (DM.unionWith (DM.unionWith (+))) . fmap f mapNodeIdNgrams = Map.unionsWith (Map.unionWith (Map.unionWith (+))) . fmap f
where where
f :: DocumentIdWithNgrams a f :: DocumentIdWithNgrams a
-> Map Ngrams (Map NgramsType (Map NodeId Int)) -> Map Ngrams (Map NgramsType (Map NodeId Int))
f d = fmap (fmap (DM.singleton nId)) $ document_ngrams d f d = fmap (fmap (Map.singleton nId)) $ document_ngrams d
where where
nId = documentId $ documentWithId d nId = documentId $ documentWithId d
...@@ -380,5 +381,6 @@ flowList uId cId ngs = do ...@@ -380,5 +381,6 @@ flowList uId cId ngs = do
lId <- getOrMkList cId uId lId <- getOrMkList cId uId
printDebug "listId flowList" lId printDebug "listId flowList" lId
listInsert lId ngs listInsert lId ngs
--trace (show $ List.filter (\n -> _ne_ngrams n == "versatile") $ List.concat $ Map.elems ngs) $ listInsert lId ngs
pure lId pure lId
...@@ -126,20 +126,21 @@ getNodesByNgramsUser :: CorpusId -> NgramsType ...@@ -126,20 +126,21 @@ getNodesByNgramsUser :: CorpusId -> NgramsType
getNodesByNgramsUser cId nt = getNodesByNgramsUser cId nt =
fromListWith (<>) <$> map (\(n,t) -> (t, Set.singleton n)) fromListWith (<>) <$> map (\(n,t) -> (t, Set.singleton n))
<$> selectNgramsByNodeUser cId nt <$> selectNgramsByNodeUser cId nt
where
selectNgramsByNodeUser :: CorpusId -> NgramsType selectNgramsByNodeUser :: CorpusId -> NgramsType
-> Cmd err [(NodeId, Text)] -> Cmd err [(NodeId, Text)]
selectNgramsByNodeUser cId nt = selectNgramsByNodeUser cId' nt' =
runPGSQuery queryNgramsByNodeUser runPGSQuery queryNgramsByNodeUser
( cId ( cId'
, nodeTypeId NodeDocument , nodeTypeId NodeDocument
, ngramsTypeId nt , ngramsTypeId nt'
, 1000 :: Int -- limit -- , 100 :: Int -- limit
, 0 :: Int -- offset -- , 0 :: Int -- offset
) )
queryNgramsByNodeUser :: DPS.Query queryNgramsByNodeUser :: DPS.Query
queryNgramsByNodeUser = [sql| queryNgramsByNodeUser = [sql|
SELECT nng.node2_id, ng.terms FROM node_node_ngrams nng SELECT nng.node2_id, ng.terms FROM node_node_ngrams nng
JOIN ngrams ng ON nng.ngrams_id = ng.id JOIN ngrams ng ON nng.ngrams_id = ng.id
...@@ -151,8 +152,8 @@ queryNgramsByNodeUser = [sql| ...@@ -151,8 +152,8 @@ queryNgramsByNodeUser = [sql|
AND nn.delete = False AND nn.delete = False
GROUP BY nng.node2_id, ng.terms GROUP BY nng.node2_id, ng.terms
ORDER BY (nng.node2_id, ng.terms) DESC ORDER BY (nng.node2_id, ng.terms) DESC
LIMIT ? -- LIMIT ?
OFFSET ? -- OFFSET ?
|] |]
------------------------------------------------------------------------ ------------------------------------------------------------------------
-- TODO add groups -- TODO add groups
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment