Commit 17fbec42 authored by Alexandre Delanoë's avatar Alexandre Delanoë

[FIX] doc ngrams insertion (WIP)

parent 6507a2d2
Pipeline #1456 failed with stage
......@@ -27,18 +27,18 @@ import GHC.Generics (Generic)
import Gargantext.API.Admin.Orchestrator.Types
import Gargantext.API.Ngrams (getNgramsTableMap, setListNgrams)
import Gargantext.API.Ngrams.Tools (getTermsWith)
import Gargantext.API.Ngrams.Types (RepoCmdM, Versioned(..), NgramsList, NgramsTerm(..))
import Gargantext.API.Ngrams.Types
import Gargantext.API.Node.Corpus.New.File (FileType(..))
import Gargantext.API.Prelude (GargServer, GargNoServer)
import Gargantext.API.Prelude (GargServer)
import Gargantext.Core.Text.Terms (ExtractedNgrams(..))
import Gargantext.Core.Text.Terms.WithList (buildPatterns, termsInText)
import Gargantext.Core.Types.Main (ListType(..))
import Gargantext.Core.Utils.Prefix (unPrefixSwagger)
import Gargantext.Database.Action.Flow (saveDocNgramsWith)
import Gargantext.Database.Action.Flow.Types (FlowCmdM)
import Gargantext.Database.Action.Flow.Utils (insertDocNgrams)
import Gargantext.Database.Action.Metrics.NgramsByNode (getOccByNgramsOnlyFast')
import Gargantext.Database.Admin.Types.Hyperdata.Document
import Gargantext.Database.Admin.Types.Node
import Gargantext.Database.Query.Table.Ngrams (insertNgrams)
import Gargantext.Database.Query.Table.NodeNode (selectDocNodes)
import Gargantext.Database.Schema.Ngrams
import Gargantext.Database.Schema.Node
......@@ -102,11 +102,14 @@ post l m = do
-----------------------------------------------------------------------------
-- | Re-index documents of a corpus with new ngrams (called orphans here)
reIndexWith :: CorpusId
reIndexWith :: ( HasRepo env
, FlowCmdM env err m
)
=> CorpusId
-> ListId
-> NgramsType
-> Set ListType
-> GargNoServer ()
-> m ()
reIndexWith cId lId nt lts = do
-- Getting [NgramsTerm]
ts <- List.concat
......@@ -125,20 +128,15 @@ reIndexWith cId lId nt lts = do
Just n -> if n == 1 then [t] else [ ]
) ts
-- Getting the Id of orphan ngrams
mapTextNgramsId <- insertNgrams (map (text2ngrams . unNgramsTerm) orphans)
printDebug "orphans" orphans
-- Get all documents of the corpus
docs <- selectDocNodes cId
printDebug "docs length" (List.length docs)
-- Checking Text documents where orphans match
-- TODO Tests here
let
ngramsByDoc = List.concat
ngramsByDoc = HashMap.fromList
$ map (\(k,v) -> (SimpleNgrams (text2ngrams k), v))
$ List.concat
$ map (\doc -> List.zip
(termsInText (buildPatterns $ map (\k -> ([unNgramsTerm k], [])) orphans)
$ Text.unlines $ catMaybes
......@@ -152,12 +150,9 @@ reIndexWith cId lId nt lts = do
printDebug "ngramsByDoc" ngramsByDoc
-- Saving the indexation in database
_ <- insertDocNgrams lId ( HashMap.fromList
$ catMaybes
$ map (\(t,d) -> (,) <$> toIndexedNgrams mapTextNgramsId t
<*> Just d ) ngramsByDoc
)
pure ()
_ <- saveDocNgramsWith lId ngramsByDoc
pure () -- ngramsByDoc
toIndexedNgrams :: HashMap Text NgramsId -> Text -> Maybe (Indexed Int Ngrams)
toIndexedNgrams m t = Indexed <$> i <*> n
......
......@@ -118,13 +118,13 @@ updateNode _uId nid1 (LinkNodeReq nt nid2) logStatus = do
, _scst_events = Just []
}
updateNode _uId nId (UpdateNodeParamsList _mode) logStatus = do
updateNode _uId lId (UpdateNodeParamsList _mode) logStatus = do
logStatus JobLog { _scst_succeeded = Just 1
, _scst_failed = Just 0
, _scst_remaining = Just 2
, _scst_events = Just []
}
corpusId <- view node_parent_id <$> getNode nId
corpusId <- view node_parent_id <$> getNode lId
logStatus JobLog { _scst_succeeded = Just 2
, _scst_failed = Just 0
......@@ -133,7 +133,7 @@ updateNode _uId nId (UpdateNodeParamsList _mode) logStatus = do
}
_ <- case corpusId of
Just cId -> reIndexWith cId nId NgramsTerms (Set.singleton MapTerm)
Just cId -> reIndexWith cId lId NgramsTerms (Set.singleton MapTerm)
Nothing -> pure ()
pure JobLog { _scst_succeeded = Just 3
......
......@@ -11,6 +11,7 @@ Portability : POSIX
{-# LANGUAGE ConstraintKinds #-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE MonoLocalBinds #-}
module Gargantext.API.Prelude
( module Gargantext.API.Prelude
......@@ -30,10 +31,6 @@ import Crypto.JOSE.Error as Jose
import Data.Aeson.Types
import Data.Typeable
import Data.Validity
import Servant
import Servant.Job.Async
import Servant.Job.Core (HasServerError(..), serverError)
import Gargantext.API.Admin.Orchestrator.Types
import Gargantext.API.Admin.Types
import Gargantext.API.Ngrams.Types
......@@ -42,6 +39,9 @@ import Gargantext.Database.Prelude
import Gargantext.Database.Query.Table.Node.Error (NodeError(..), HasNodeError(..))
import Gargantext.Database.Query.Tree
import Gargantext.Prelude
import Servant
import Servant.Job.Async
import Servant.Job.Core (HasServerError(..), serverError)
class HasJoseError e where
_JoseError :: Prism' e Jose.Error
......
......@@ -33,6 +33,7 @@ module Gargantext.Database.Action.Flow -- (flowDatabase, ngrams2list)
, flowCorpus
, flowAnnuaire
, insertMasterDocs
, saveDocNgramsWith
, getOrMkRoot
, getOrMk_RootWithCorpus
......@@ -280,6 +281,17 @@ insertMasterDocs c lang hs = do
(extractNgramsT $ withLang lang documentsWithId)
documentsWithId
lId <- getOrMkList masterCorpusId masterUserId
_ <- saveDocNgramsWith lId mapNgramsDocs'
-- _cooc <- insertDefaultNode NodeListCooc lId masterUserId
pure ids'
saveDocNgramsWith :: ( FlowCmdM env err m)
=> ListId
-> HashMap ExtractedNgrams (Map NgramsType (Map NodeId Int))
-> m ()
saveDocNgramsWith lId mapNgramsDocs' = do
terms2id <- insertExtractedNgrams $ HashMap.keys mapNgramsDocs'
let mapNgramsDocs = HashMap.mapKeys extracted2ngrams mapNgramsDocs'
......@@ -287,10 +299,10 @@ insertMasterDocs c lang hs = do
let indexedNgrams = HashMap.mapKeys (indexNgrams terms2id) mapNgramsDocs
-- new
lId <- getOrMkList masterCorpusId masterUserId
mapCgramsId <- listInsertDb lId toNodeNgramsW'
$ map (first _ngramsTerms . second Map.keys)
$ HashMap.toList mapNgramsDocs
-- insertDocNgrams
_return <- insertNodeNodeNgrams2
$ catMaybes [ NodeNodeNgrams2 <$> Just nId
......@@ -300,11 +312,11 @@ insertMasterDocs c lang hs = do
, (ngrams_type, mapNodeIdWeight) <- Map.toList mapNgramsTypes
, (nId, w) <- Map.toList mapNodeIdWeight
]
-- _cooc <- insertDefaultNode NodeListCooc lId masterUserId
-- to be removed
_ <- insertDocNgrams lId indexedNgrams
pure ids'
pure ()
------------------------------------------------------------------------
-- TODO Type NodeDocumentUnicised
......
......@@ -138,8 +138,8 @@ type NodeSearchReadNull =
data NodePolySearch id
typename
userId
parentId
user_id
parent_id
name
date
hyperdata
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment