[ngrams] small refactorings

parent b0ca0a6a
...@@ -34,7 +34,8 @@ import Gargantext.API.Prelude (GargServer, GargM, GargError) ...@@ -34,7 +34,8 @@ import Gargantext.API.Prelude (GargServer, GargM, GargError)
import Gargantext.API.Types import Gargantext.API.Types
import Gargantext.Core.NodeStory import Gargantext.Core.NodeStory
import Gargantext.Core.Text.Terms (ExtractedNgrams(..)) import Gargantext.Core.Text.Terms (ExtractedNgrams(..))
import Gargantext.Core.Text.Terms.WithList (buildPatterns, termsInText) import Gargantext.Core.Text.Terms.WithList (MatchedText, buildPatterns, termsInText)
import Gargantext.Core.Types (TermsCount)
import Gargantext.Core.Types.Main (ListType(..)) import Gargantext.Core.Types.Main (ListType(..))
import Gargantext.Database.Action.Flow (saveDocNgramsWith) import Gargantext.Database.Action.Flow (saveDocNgramsWith)
import Gargantext.Database.Action.Flow.Types (FlowCmdM) import Gargantext.Database.Action.Flow.Types (FlowCmdM)
...@@ -166,15 +167,7 @@ reIndexWith cId lId nt lts = do ...@@ -166,15 +167,7 @@ reIndexWith cId lId nt lts = do
-- fromListWith (<>) -- fromListWith (<>)
ngramsByDoc = map (HashMap.fromListWith (Map.unionWith (Map.unionWith (\(_a,b) (_a',b') -> (1,b+b'))))) ngramsByDoc = map (HashMap.fromListWith (Map.unionWith (Map.unionWith (\(_a,b) (_a',b') -> (1,b+b')))))
$ map (map (\((k, cnt), v) -> (SimpleNgrams (text2ngrams k), over (traverse . traverse) (\p -> (p, cnt)) v))) $ map (map (\((k, cnt), v) -> (SimpleNgrams (text2ngrams k), over (traverse . traverse) (\p -> (p, cnt)) v)))
$ map (\doc -> List.zip $ map (docNgrams nt ts) docs
(termsInText (buildPatterns $ map (\k -> (Text.splitOn " " $ unNgramsTerm k, [])) ts)
$ Text.unlines $ catMaybes
[ doc ^. context_hyperdata . hd_title
, doc ^. context_hyperdata . hd_abstract
]
)
(List.cycle [Map.fromList $ [(nt, Map.singleton (doc ^. context_id) 1 )]])
) docs
-- printDebug "ngramsByDoc: " ngramsByDoc -- printDebug "ngramsByDoc: " ngramsByDoc
...@@ -183,6 +176,21 @@ reIndexWith cId lId nt lts = do ...@@ -183,6 +176,21 @@ reIndexWith cId lId nt lts = do
-- _ <- refreshNgramsMaterialized -- _ <- refreshNgramsMaterialized
pure () pure ()
docNgrams :: NgramsType
-> [NgramsTerm]
-> Gargantext.Database.Admin.Types.Node.Context HyperdataDocument
-> [((MatchedText, TermsCount),
Map NgramsType (Map NodeId Int))]
docNgrams nt ts doc =
List.zip
(termsInText (buildPatterns $ map (\k -> (Text.splitOn " " $ unNgramsTerm k, [])) ts)
$ Text.unlines $ catMaybes
[ doc ^. context_hyperdata . hd_title
, doc ^. context_hyperdata . hd_abstract
]
)
(List.cycle [Map.fromList $ [(nt, Map.singleton (doc ^. context_id) 1 )]])
toIndexedNgrams :: HashMap Text NgramsId -> Text -> Maybe (Indexed Int Ngrams) toIndexedNgrams :: HashMap Text NgramsId -> Text -> Maybe (Indexed Int Ngrams)
toIndexedNgrams m t = Indexed <$> i <*> n toIndexedNgrams m t = Indexed <$> i <*> n
where where
......
...@@ -227,24 +227,29 @@ getContextNgramsMatchingFTS :: HasNodeError err ...@@ -227,24 +227,29 @@ getContextNgramsMatchingFTS :: HasNodeError err
-> NodeId -> NodeId
-> Cmd err [Text] -> Cmd err [Text]
getContextNgramsMatchingFTS contextId listId = do getContextNgramsMatchingFTS contextId listId = do
res <- runPGSQuery query (listId, listId, contextId) res <- runPGSQuery query (listId, contextId)
pure $ (\(PGS.Only term) -> term) <$> res pure $ (\(PGS.Only term) -> term) <$> res
where where
query :: PGS.Query query :: PGS.Query
query = [sql| WITH ngrams_ids AS query = [sql| WITH constants AS
(SELECT ? AS list_id, ? AS context_id),
ngrams_ids AS
(SELECT ngrams_id (SELECT ngrams_id
FROM node_stories FROM node_stories
WHERE node_id = ? CROSS JOIN constants
WHERE node_id = constants.list_id
UNION SELECT ngrams_id UNION SELECT ngrams_id
FROM node_ngrams FROM node_ngrams
WHERE node_id = ?) CROSS JOIN constants
WHERE node_id = constants.list_id)
SELECT DISTINCT ngrams.terms SELECT DISTINCT ngrams.terms
FROM ngrams FROM ngrams
JOIN ngrams_ids ON ngrams_ids.ngrams_id = ngrams.id JOIN ngrams_ids ON ngrams_ids.ngrams_id = ngrams.id
CROSS JOIN constants
-- JOIN node_ngrams ON node_ngrams.ngrams_id = ngrams.id -- JOIN node_ngrams ON node_ngrams.ngrams_id = ngrams.id
CROSS JOIN contexts CROSS JOIN contexts
WHERE contexts.id = ? WHERE contexts.id = constants.context_id
-- AND node_ngrams.node_id = ? -- AND node_ngrams.node_id = ?
AND (contexts.search @@ plainto_tsquery(ngrams.terms) AND (contexts.search @@ plainto_tsquery(ngrams.terms)
OR contexts.search @@ plainto_tsquery('french', ngrams.terms)) |] OR contexts.search @@ plainto_tsquery('french', ngrams.terms)) |]
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment