[ngrams] small refactorings

parent b0ca0a6a
......@@ -34,7 +34,8 @@ import Gargantext.API.Prelude (GargServer, GargM, GargError)
import Gargantext.API.Types
import Gargantext.Core.NodeStory
import Gargantext.Core.Text.Terms (ExtractedNgrams(..))
import Gargantext.Core.Text.Terms.WithList (buildPatterns, termsInText)
import Gargantext.Core.Text.Terms.WithList (MatchedText, buildPatterns, termsInText)
import Gargantext.Core.Types (TermsCount)
import Gargantext.Core.Types.Main (ListType(..))
import Gargantext.Database.Action.Flow (saveDocNgramsWith)
import Gargantext.Database.Action.Flow.Types (FlowCmdM)
......@@ -166,15 +167,7 @@ reIndexWith cId lId nt lts = do
-- fromListWith (<>)
ngramsByDoc = map (HashMap.fromListWith (Map.unionWith (Map.unionWith (\(_a,b) (_a',b') -> (1,b+b')))))
$ map (map (\((k, cnt), v) -> (SimpleNgrams (text2ngrams k), over (traverse . traverse) (\p -> (p, cnt)) v)))
$ map (\doc -> List.zip
(termsInText (buildPatterns $ map (\k -> (Text.splitOn " " $ unNgramsTerm k, [])) ts)
$ Text.unlines $ catMaybes
[ doc ^. context_hyperdata . hd_title
, doc ^. context_hyperdata . hd_abstract
]
)
(List.cycle [Map.fromList $ [(nt, Map.singleton (doc ^. context_id) 1 )]])
) docs
$ map (docNgrams nt ts) docs
-- printDebug "ngramsByDoc: " ngramsByDoc
......@@ -183,6 +176,21 @@ reIndexWith cId lId nt lts = do
-- _ <- refreshNgramsMaterialized
pure ()
docNgrams :: NgramsType
-> [NgramsTerm]
-> Gargantext.Database.Admin.Types.Node.Context HyperdataDocument
-> [((MatchedText, TermsCount),
Map NgramsType (Map NodeId Int))]
docNgrams nt ts doc =
List.zip
(termsInText (buildPatterns $ map (\k -> (Text.splitOn " " $ unNgramsTerm k, [])) ts)
$ Text.unlines $ catMaybes
[ doc ^. context_hyperdata . hd_title
, doc ^. context_hyperdata . hd_abstract
]
)
(List.cycle [Map.fromList $ [(nt, Map.singleton (doc ^. context_id) 1 )]])
toIndexedNgrams :: HashMap Text NgramsId -> Text -> Maybe (Indexed Int Ngrams)
toIndexedNgrams m t = Indexed <$> i <*> n
where
......
......@@ -227,24 +227,29 @@ getContextNgramsMatchingFTS :: HasNodeError err
-> NodeId
-> Cmd err [Text]
getContextNgramsMatchingFTS contextId listId = do
res <- runPGSQuery query (listId, listId, contextId)
res <- runPGSQuery query (listId, contextId)
pure $ (\(PGS.Only term) -> term) <$> res
where
query :: PGS.Query
query = [sql| WITH ngrams_ids AS
query = [sql| WITH constants AS
(SELECT ? AS list_id, ? AS context_id),
ngrams_ids AS
(SELECT ngrams_id
FROM node_stories
WHERE node_id = ?
CROSS JOIN constants
WHERE node_id = constants.list_id
UNION SELECT ngrams_id
FROM node_ngrams
WHERE node_id = ?)
CROSS JOIN constants
WHERE node_id = constants.list_id)
SELECT DISTINCT ngrams.terms
FROM ngrams
JOIN ngrams_ids ON ngrams_ids.ngrams_id = ngrams.id
CROSS JOIN constants
-- JOIN node_ngrams ON node_ngrams.ngrams_id = ngrams.id
CROSS JOIN contexts
WHERE contexts.id = ?
WHERE contexts.id = constants.context_id
-- AND node_ngrams.node_id = ?
AND (contexts.search @@ plainto_tsquery(ngrams.terms)
OR contexts.search @@ plainto_tsquery('french', ngrams.terms)) |]
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment