Commit 17fbec42 authored by Alexandre Delanoë's avatar Alexandre Delanoë

[FIX] doc ngrams insertion (WIP)

parent 6507a2d2
Pipeline #1456 failed with stage
...@@ -27,18 +27,18 @@ import GHC.Generics (Generic) ...@@ -27,18 +27,18 @@ import GHC.Generics (Generic)
import Gargantext.API.Admin.Orchestrator.Types import Gargantext.API.Admin.Orchestrator.Types
import Gargantext.API.Ngrams (getNgramsTableMap, setListNgrams) import Gargantext.API.Ngrams (getNgramsTableMap, setListNgrams)
import Gargantext.API.Ngrams.Tools (getTermsWith) import Gargantext.API.Ngrams.Tools (getTermsWith)
import Gargantext.API.Ngrams.Types (RepoCmdM, Versioned(..), NgramsList, NgramsTerm(..)) import Gargantext.API.Ngrams.Types
import Gargantext.API.Node.Corpus.New.File (FileType(..)) import Gargantext.API.Node.Corpus.New.File (FileType(..))
import Gargantext.API.Prelude (GargServer, GargNoServer) import Gargantext.API.Prelude (GargServer)
import Gargantext.Core.Text.Terms (ExtractedNgrams(..))
import Gargantext.Core.Text.Terms.WithList (buildPatterns, termsInText) import Gargantext.Core.Text.Terms.WithList (buildPatterns, termsInText)
import Gargantext.Core.Types.Main (ListType(..)) import Gargantext.Core.Types.Main (ListType(..))
import Gargantext.Core.Utils.Prefix (unPrefixSwagger) import Gargantext.Core.Utils.Prefix (unPrefixSwagger)
import Gargantext.Database.Action.Flow (saveDocNgramsWith)
import Gargantext.Database.Action.Flow.Types (FlowCmdM) import Gargantext.Database.Action.Flow.Types (FlowCmdM)
import Gargantext.Database.Action.Flow.Utils (insertDocNgrams)
import Gargantext.Database.Action.Metrics.NgramsByNode (getOccByNgramsOnlyFast') import Gargantext.Database.Action.Metrics.NgramsByNode (getOccByNgramsOnlyFast')
import Gargantext.Database.Admin.Types.Hyperdata.Document import Gargantext.Database.Admin.Types.Hyperdata.Document
import Gargantext.Database.Admin.Types.Node import Gargantext.Database.Admin.Types.Node
import Gargantext.Database.Query.Table.Ngrams (insertNgrams)
import Gargantext.Database.Query.Table.NodeNode (selectDocNodes) import Gargantext.Database.Query.Table.NodeNode (selectDocNodes)
import Gargantext.Database.Schema.Ngrams import Gargantext.Database.Schema.Ngrams
import Gargantext.Database.Schema.Node import Gargantext.Database.Schema.Node
...@@ -102,11 +102,14 @@ post l m = do ...@@ -102,11 +102,14 @@ post l m = do
----------------------------------------------------------------------------- -----------------------------------------------------------------------------
-- | Re-index documents of a corpus with new ngrams (called orphans here) -- | Re-index documents of a corpus with new ngrams (called orphans here)
reIndexWith :: CorpusId reIndexWith :: ( HasRepo env
, FlowCmdM env err m
)
=> CorpusId
-> ListId -> ListId
-> NgramsType -> NgramsType
-> Set ListType -> Set ListType
-> GargNoServer () -> m ()
reIndexWith cId lId nt lts = do reIndexWith cId lId nt lts = do
-- Getting [NgramsTerm] -- Getting [NgramsTerm]
ts <- List.concat ts <- List.concat
...@@ -125,21 +128,16 @@ reIndexWith cId lId nt lts = do ...@@ -125,21 +128,16 @@ reIndexWith cId lId nt lts = do
Just n -> if n == 1 then [t] else [ ] Just n -> if n == 1 then [t] else [ ]
) ts ) ts
-- Getting the Id of orphan ngrams
mapTextNgramsId <- insertNgrams (map (text2ngrams . unNgramsTerm) orphans)
printDebug "orphans" orphans
-- Get all documents of the corpus -- Get all documents of the corpus
docs <- selectDocNodes cId docs <- selectDocNodes cId
printDebug "docs length" (List.length docs)
-- Checking Text documents where orphans match -- Checking Text documents where orphans match
-- TODO Tests here -- TODO Tests here
let let
ngramsByDoc = List.concat ngramsByDoc = HashMap.fromList
$ map (\doc -> List.zip $ map (\(k,v) -> (SimpleNgrams (text2ngrams k), v))
$ List.concat
$ map (\doc -> List.zip
(termsInText (buildPatterns $ map (\k -> ([unNgramsTerm k], [])) orphans) (termsInText (buildPatterns $ map (\k -> ([unNgramsTerm k], [])) orphans)
$ Text.unlines $ catMaybes $ Text.unlines $ catMaybes
[ doc ^. node_hyperdata . hd_title [ doc ^. node_hyperdata . hd_title
...@@ -152,12 +150,9 @@ reIndexWith cId lId nt lts = do ...@@ -152,12 +150,9 @@ reIndexWith cId lId nt lts = do
printDebug "ngramsByDoc" ngramsByDoc printDebug "ngramsByDoc" ngramsByDoc
-- Saving the indexation in database -- Saving the indexation in database
_ <- insertDocNgrams lId ( HashMap.fromList _ <- saveDocNgramsWith lId ngramsByDoc
$ catMaybes
$ map (\(t,d) -> (,) <$> toIndexedNgrams mapTextNgramsId t pure () -- ngramsByDoc
<*> Just d ) ngramsByDoc
)
pure ()
toIndexedNgrams :: HashMap Text NgramsId -> Text -> Maybe (Indexed Int Ngrams) toIndexedNgrams :: HashMap Text NgramsId -> Text -> Maybe (Indexed Int Ngrams)
toIndexedNgrams m t = Indexed <$> i <*> n toIndexedNgrams m t = Indexed <$> i <*> n
......
...@@ -118,13 +118,13 @@ updateNode _uId nid1 (LinkNodeReq nt nid2) logStatus = do ...@@ -118,13 +118,13 @@ updateNode _uId nid1 (LinkNodeReq nt nid2) logStatus = do
, _scst_events = Just [] , _scst_events = Just []
} }
updateNode _uId nId (UpdateNodeParamsList _mode) logStatus = do updateNode _uId lId (UpdateNodeParamsList _mode) logStatus = do
logStatus JobLog { _scst_succeeded = Just 1 logStatus JobLog { _scst_succeeded = Just 1
, _scst_failed = Just 0 , _scst_failed = Just 0
, _scst_remaining = Just 2 , _scst_remaining = Just 2
, _scst_events = Just [] , _scst_events = Just []
} }
corpusId <- view node_parent_id <$> getNode nId corpusId <- view node_parent_id <$> getNode lId
logStatus JobLog { _scst_succeeded = Just 2 logStatus JobLog { _scst_succeeded = Just 2
, _scst_failed = Just 0 , _scst_failed = Just 0
...@@ -133,7 +133,7 @@ updateNode _uId nId (UpdateNodeParamsList _mode) logStatus = do ...@@ -133,7 +133,7 @@ updateNode _uId nId (UpdateNodeParamsList _mode) logStatus = do
} }
_ <- case corpusId of _ <- case corpusId of
Just cId -> reIndexWith cId nId NgramsTerms (Set.singleton MapTerm) Just cId -> reIndexWith cId lId NgramsTerms (Set.singleton MapTerm)
Nothing -> pure () Nothing -> pure ()
pure JobLog { _scst_succeeded = Just 3 pure JobLog { _scst_succeeded = Just 3
......
...@@ -11,6 +11,7 @@ Portability : POSIX ...@@ -11,6 +11,7 @@ Portability : POSIX
{-# LANGUAGE ConstraintKinds #-} {-# LANGUAGE ConstraintKinds #-}
{-# LANGUAGE TemplateHaskell #-} {-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE MonoLocalBinds #-}
module Gargantext.API.Prelude module Gargantext.API.Prelude
( module Gargantext.API.Prelude ( module Gargantext.API.Prelude
...@@ -30,10 +31,6 @@ import Crypto.JOSE.Error as Jose ...@@ -30,10 +31,6 @@ import Crypto.JOSE.Error as Jose
import Data.Aeson.Types import Data.Aeson.Types
import Data.Typeable import Data.Typeable
import Data.Validity import Data.Validity
import Servant
import Servant.Job.Async
import Servant.Job.Core (HasServerError(..), serverError)
import Gargantext.API.Admin.Orchestrator.Types import Gargantext.API.Admin.Orchestrator.Types
import Gargantext.API.Admin.Types import Gargantext.API.Admin.Types
import Gargantext.API.Ngrams.Types import Gargantext.API.Ngrams.Types
...@@ -42,6 +39,9 @@ import Gargantext.Database.Prelude ...@@ -42,6 +39,9 @@ import Gargantext.Database.Prelude
import Gargantext.Database.Query.Table.Node.Error (NodeError(..), HasNodeError(..)) import Gargantext.Database.Query.Table.Node.Error (NodeError(..), HasNodeError(..))
import Gargantext.Database.Query.Tree import Gargantext.Database.Query.Tree
import Gargantext.Prelude import Gargantext.Prelude
import Servant
import Servant.Job.Async
import Servant.Job.Core (HasServerError(..), serverError)
class HasJoseError e where class HasJoseError e where
_JoseError :: Prism' e Jose.Error _JoseError :: Prism' e Jose.Error
......
...@@ -33,6 +33,7 @@ module Gargantext.Database.Action.Flow -- (flowDatabase, ngrams2list) ...@@ -33,6 +33,7 @@ module Gargantext.Database.Action.Flow -- (flowDatabase, ngrams2list)
, flowCorpus , flowCorpus
, flowAnnuaire , flowAnnuaire
, insertMasterDocs , insertMasterDocs
, saveDocNgramsWith
, getOrMkRoot , getOrMkRoot
, getOrMk_RootWithCorpus , getOrMk_RootWithCorpus
...@@ -280,6 +281,17 @@ insertMasterDocs c lang hs = do ...@@ -280,6 +281,17 @@ insertMasterDocs c lang hs = do
(extractNgramsT $ withLang lang documentsWithId) (extractNgramsT $ withLang lang documentsWithId)
documentsWithId documentsWithId
lId <- getOrMkList masterCorpusId masterUserId
_ <- saveDocNgramsWith lId mapNgramsDocs'
-- _cooc <- insertDefaultNode NodeListCooc lId masterUserId
pure ids'
saveDocNgramsWith :: ( FlowCmdM env err m)
=> ListId
-> HashMap ExtractedNgrams (Map NgramsType (Map NodeId Int))
-> m ()
saveDocNgramsWith lId mapNgramsDocs' = do
terms2id <- insertExtractedNgrams $ HashMap.keys mapNgramsDocs' terms2id <- insertExtractedNgrams $ HashMap.keys mapNgramsDocs'
let mapNgramsDocs = HashMap.mapKeys extracted2ngrams mapNgramsDocs' let mapNgramsDocs = HashMap.mapKeys extracted2ngrams mapNgramsDocs'
...@@ -287,10 +299,10 @@ insertMasterDocs c lang hs = do ...@@ -287,10 +299,10 @@ insertMasterDocs c lang hs = do
let indexedNgrams = HashMap.mapKeys (indexNgrams terms2id) mapNgramsDocs let indexedNgrams = HashMap.mapKeys (indexNgrams terms2id) mapNgramsDocs
-- new -- new
lId <- getOrMkList masterCorpusId masterUserId
mapCgramsId <- listInsertDb lId toNodeNgramsW' mapCgramsId <- listInsertDb lId toNodeNgramsW'
$ map (first _ngramsTerms . second Map.keys) $ map (first _ngramsTerms . second Map.keys)
$ HashMap.toList mapNgramsDocs $ HashMap.toList mapNgramsDocs
-- insertDocNgrams -- insertDocNgrams
_return <- insertNodeNodeNgrams2 _return <- insertNodeNodeNgrams2
$ catMaybes [ NodeNodeNgrams2 <$> Just nId $ catMaybes [ NodeNodeNgrams2 <$> Just nId
...@@ -300,11 +312,11 @@ insertMasterDocs c lang hs = do ...@@ -300,11 +312,11 @@ insertMasterDocs c lang hs = do
, (ngrams_type, mapNodeIdWeight) <- Map.toList mapNgramsTypes , (ngrams_type, mapNodeIdWeight) <- Map.toList mapNgramsTypes
, (nId, w) <- Map.toList mapNodeIdWeight , (nId, w) <- Map.toList mapNodeIdWeight
] ]
-- _cooc <- insertDefaultNode NodeListCooc lId masterUserId
-- to be removed -- to be removed
_ <- insertDocNgrams lId indexedNgrams _ <- insertDocNgrams lId indexedNgrams
pure ids'
pure ()
------------------------------------------------------------------------ ------------------------------------------------------------------------
-- TODO Type NodeDocumentUnicised -- TODO Type NodeDocumentUnicised
......
...@@ -138,8 +138,8 @@ type NodeSearchReadNull = ...@@ -138,8 +138,8 @@ type NodeSearchReadNull =
data NodePolySearch id data NodePolySearch id
typename typename
userId user_id
parentId parent_id
name name
date date
hyperdata hyperdata
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment