diff --git a/bin/gargantext-import/Main.hs b/bin/gargantext-import/Main.hs index e3974fdb1f383c37a5c29771f07e46641359fbeb..53a2fadf400022da8f87b57546f00c3f82220ddf 100644 --- a/bin/gargantext-import/Main.hs +++ b/bin/gargantext-import/Main.hs @@ -40,7 +40,7 @@ main = do -} let cmdCorpus :: forall m. FlowCmdM DevEnv ServantErr m => m CorpusId - cmdCorpus = flowCorpus (cs user) CsvHalFormat corpusPath (cs name) + cmdCorpus = flowCorpus (cs user) (cs name) CsvHalFormat corpusPath -- cmd = {-createUsers >>-} cmdCorpus diff --git a/src/Gargantext/Database/Flow.hs b/src/Gargantext/Database/Flow.hs index 053d5be6254d438ac6f67dbc94d65001f58ca51a..1fc60628f723cf66d54548990fe65028ec0b465c 100644 --- a/src/Gargantext/Database/Flow.hs +++ b/src/Gargantext/Database/Flow.hs @@ -46,6 +46,7 @@ import Gargantext.Core (Lang(..)) import Gargantext.Core.Types (NodePoly(..), Terms(..)) import Gargantext.Core.Types.Individu (Username) import Gargantext.Core.Types.Main +import Gargantext.Database.TextSearch (searchInDatabase) import Gargantext.Database.Config (userMaster, corpusMasterName) import Gargantext.Database.Flow.Utils (insertToNodeNgrams) import Gargantext.Database.Node.Document.Insert -- (insertDocuments, ReturnId(..), addUniqIdsDoc, addUniqIdsContact, ToDbData(..)) @@ -61,6 +62,7 @@ import Gargantext.Text.List import Gargantext.Text.Parsers (parseDocs, FileFormat) import Gargantext.Text.Terms (TermType(..)) import Gargantext.Text.Terms (extractTerms) +import Gargantext.Text.Terms.Mono.Stem.En (stemIt) import Servant (ServantErr) import System.FilePath (FilePath) import qualified Data.Map as DM @@ -76,8 +78,20 @@ type FlowCmdM env err m = flowCorpus :: FlowCmdM env ServantErr m - => Username -> FileFormat -> FilePath -> CorpusName -> m CorpusId -flowCorpus userName ff fp corpusName = do + => Username -> CorpusName -> FileFormat -> FilePath -> m CorpusId +flowCorpus u cn ff fp = do + ids <- flowCorpusMaster ff fp + flowCorpusUser u cn ids + +flowCorpusSearchInDatabase :: FlowCmdM env ServantErr m + => Username -> CorpusName -> Text -> m CorpusId +flowCorpusSearchInDatabase u cn q = do + ids <- chunkAlong 10000 10000 <$> map fst <$> searchInDatabase 2 (stemIt q) + flowCorpusUser u cn ids + + +flowCorpusMaster :: FlowCmdM env ServantErr m => FileFormat -> FilePath -> m [[NodeId]] +flowCorpusMaster ff fp = do -- Master Flow docs <- map addUniqIdsDoc <$> liftIO (parseDocs ff fp) @@ -90,7 +104,11 @@ flowCorpus userName ff fp corpusName = do -- TODO: chunkAlongNoRest or chunkAlongWithRest -- default behavior: NoRest ids <- mapM insertMasterDocs $ chunkAlong 10000 10000 docs + pure ids + +flowCorpusUser :: FlowCmdM env ServantErr m => Username -> CorpusName -> [[NodeId]] -> m CorpusId +flowCorpusUser userName corpusName ids = do -- User Flow (userId, _rootId, userCorpusId) <- getOrMkRootWithCorpus userName corpusName -- TODO: check if present already, ignore diff --git a/src/Gargantext/Text/Flow.hs b/src/Gargantext/Text/Flow.hs index 66e7828fef8902ab1785eed03d75493b9bed351a..d62a8748d44503f9c613120f538a723d9fa81150 100644 --- a/src/Gargantext/Text/Flow.hs +++ b/src/Gargantext/Text/Flow.hs @@ -36,7 +36,7 @@ import Gargantext.Core.Types (CorpusId) --import Gargantext.Database.Types.Node import Gargantext.Prelude --import Gargantext.Text.Context (splitBy, SplitContext(Sentences)) ---import Gargantext.Text.Metrics (filterCooc, FilterConfig(..), Clusters(..), SampleBins(..), DefaultValue(..), MapListSize(..), InclusionSize(..)) +import Gargantext.Text.Metrics (filterCooc, FilterConfig(..), Clusters(..), SampleBins(..), DefaultValue(..), MapListSize(..), InclusionSize(..)) --import Gargantext.Text.Metrics.Count (coocOn) --import Gargantext.Text.Parsers.CSV --import Gargantext.Text.Terms (TermType, extractTerms) @@ -120,21 +120,21 @@ cooc2graph :: (Map (Text, Text) Int) -> IO Graph cooc2graph myCooc = do --printDebug "myCooc" myCooc -- Filtering terms with inclusion/Exclusion and Specificity/Genericity scores -{- let myCooc3 = filterCooc ( FilterConfig (MapListSize 350 ) + let myCooc3 = filterCooc ( FilterConfig (MapListSize 350 ) (InclusionSize 500 ) (SampleBins 10 ) (Clusters 3 ) (DefaultValue 0 ) ) myCooc --} --printDebug "myCooc3 size" $ M.size myCooc3 + --printDebug "myCooc3 size" $ M.size myCooc3 --printDebug "myCooc3" myCooc3 -- Cooc -> Matrix - let (ti, _) = createIndices myCooc + let (ti, _) = createIndices myCooc3 --printDebug "ti size" $ M.size ti --printDebug "ti" ti - let myCooc4 = toIndex ti myCooc + let myCooc4 = toIndex ti myCooc3 --printDebug "myCooc4 size" $ M.size myCooc4 --printDebug "myCooc4" myCooc4