[FLOW] Use `Map a (Map NgramsType b)` instead of `Map (NgramsT a) b`

parent 46fafbe8
Pipeline #126 canceled with stage
...@@ -37,7 +37,7 @@ import Gargantext.Text.Terms (extractTerms) ...@@ -37,7 +37,7 @@ import Gargantext.Text.Terms (extractTerms)
import Gargantext.Database.Node.Document.Add (add) import Gargantext.Database.Node.Document.Add (add)
import Gargantext.Database.Node.Document.Insert (insertDocuments, ReturnId(..), addUniqIdsDoc, addUniqIdsContact, ToDbData(..)) import Gargantext.Database.Node.Document.Insert (insertDocuments, ReturnId(..), addUniqIdsDoc, addUniqIdsContact, ToDbData(..))
import Gargantext.Database.Root (getRoot) import Gargantext.Database.Root (getRoot)
import Gargantext.Database.Schema.Ngrams (insertNgrams, Ngrams(..), NgramsT(..), NgramsIndexed(..), indexNgramsT, NgramsType(..), text2ngrams, ngramsTypeId) import Gargantext.Database.Schema.Ngrams (insertNgrams, Ngrams(..), NgramsIndexed(..), indexNgrams, NgramsType(..), text2ngrams, ngramsTypeId)
import Gargantext.Database.Schema.Node (mkRoot, mkCorpus, getOrMkList, mkGraph, mkDashboard, mkAnnuaire, getCorporaWithParentId, HasNodeError, NodeError(..), nodeError) import Gargantext.Database.Schema.Node (mkRoot, mkCorpus, getOrMkList, mkGraph, mkDashboard, mkAnnuaire, getCorporaWithParentId, HasNodeError, NodeError(..), nodeError)
import Gargantext.Database.Schema.NodeNgram (NodeNgramPoly(..), insertNodeNgrams) import Gargantext.Database.Schema.NodeNgram (NodeNgramPoly(..), insertNodeNgrams)
import Gargantext.Database.Schema.NodeNgramsNgrams (NodeNgramsNgramsPoly(..), insertNodeNgramsNgramsNew) import Gargantext.Database.Schema.NodeNgramsNgrams (NodeNgramsNgramsPoly(..), insertNodeNgramsNgramsNew)
...@@ -51,7 +51,6 @@ import Gargantext.Prelude ...@@ -51,7 +51,6 @@ import Gargantext.Prelude
import Gargantext.Text.Parsers (parseDocs, FileFormat) import Gargantext.Text.Parsers (parseDocs, FileFormat)
import System.FilePath (FilePath) import System.FilePath (FilePath)
import qualified Data.Map as DM import qualified Data.Map as DM
import qualified Data.Set as DS
flowCorpus :: HasNodeError err => FileFormat -> FilePath -> CorpusName -> Cmd err CorpusId flowCorpus :: HasNodeError err => FileFormat -> FilePath -> CorpusName -> Cmd err CorpusId
...@@ -114,8 +113,8 @@ flowCorpus' NodeCorpus hyperdataDocuments (ids,masterUserId,masterCorpusId, user ...@@ -114,8 +113,8 @@ flowCorpus' NodeCorpus hyperdataDocuments (ids,masterUserId,masterCorpusId, user
let maps = mapNodeIdNgrams docsWithNgrams let maps = mapNodeIdNgrams docsWithNgrams
-- printDebug "maps" (maps) -- printDebug "maps" (maps)
terms2id <- insertNgrams (DS.toList $ DS.map _ngramsT (DM.keysSet maps)) terms2id <- insertNgrams $ DM.keys maps
let indexedNgrams = DM.mapKeys (indexNgramsT terms2id) maps let indexedNgrams = DM.mapKeys (indexNgrams terms2id) maps
-- printDebug "inserted ngrams" indexedNgrams -- printDebug "inserted ngrams" indexedNgrams
_ <- insertToNodeNgrams indexedNgrams _ <- insertToNodeNgrams indexedNgrams
...@@ -226,27 +225,27 @@ mergeData rs = catMaybes . map toDocumentWithId . DM.toList ...@@ -226,27 +225,27 @@ mergeData rs = catMaybes . map toDocumentWithId . DM.toList
data DocumentIdWithNgrams = data DocumentIdWithNgrams =
DocumentIdWithNgrams DocumentIdWithNgrams
{ documentWithId :: !DocumentWithId { documentWithId :: !DocumentWithId
, document_ngrams :: !(Map (NgramsT Ngrams) Int) , document_ngrams :: !(Map Ngrams (Map NgramsType Int))
} deriving (Show) } deriving (Show)
-- TODO group terms -- TODO group terms
extractNgramsT :: HasNodeError err => HyperdataDocument -> Cmd err (Map (NgramsT Ngrams) Int) extractNgramsT :: HasNodeError err => HyperdataDocument -> Cmd err (Map Ngrams (Map NgramsType Int))
extractNgramsT doc = do extractNgramsT doc = do
let source = text2ngrams $ maybe "Nothing" identity $ _hyperdataDocument_source doc let source = text2ngrams $ maybe "Nothing" identity $ _hyperdataDocument_source doc
let institutes = map text2ngrams $ maybe ["Nothing"] (map toSchoolName . (splitOn ", ")) $ _hyperdataDocument_institutes doc let institutes = map text2ngrams $ maybe ["Nothing"] (map toSchoolName . (splitOn ", ")) $ _hyperdataDocument_institutes doc
let authors = map text2ngrams $ maybe ["Nothing"] (splitOn ", ") $ _hyperdataDocument_authors doc let authors = map text2ngrams $ maybe ["Nothing"] (splitOn ", ") $ _hyperdataDocument_authors doc
let leText = catMaybes [_hyperdataDocument_title doc, _hyperdataDocument_abstract doc] let leText = catMaybes [_hyperdataDocument_title doc, _hyperdataDocument_abstract doc]
terms' <- map text2ngrams <$> map (intercalate " " . _terms_label) <$> concat <$> liftIO (extractTerms (Multi EN) leText) terms' <- map text2ngrams <$> map (intercalate " " . _terms_label) <$> concat <$> liftIO (extractTerms (Multi EN) leText)
pure $ DM.fromList $ [(NgramsT Sources source, 1)] pure $ DM.fromList $ [(source, DM.singleton Sources 1)]
<> [(NgramsT Institutes i' , 1)| i' <- institutes ] <> [(i', DM.singleton Institutes 1) | i' <- institutes ]
<> [(NgramsT Authors a' , 1)| a' <- authors ] <> [(a', DM.singleton Authors 1) | a' <- authors ]
<> [(NgramsT NgramsTerms t' , 1)| t' <- terms' ] <> [(t', DM.singleton NgramsTerms 1) | t' <- terms' ]
documentIdWithNgrams :: HasNodeError err => (HyperdataDocument -> Cmd err (Map (NgramsT Ngrams) Int)) documentIdWithNgrams :: HasNodeError err
=> (HyperdataDocument -> Cmd err (Map Ngrams (Map NgramsType Int)))
-> [DocumentWithId] -> Cmd err [DocumentIdWithNgrams] -> [DocumentWithId] -> Cmd err [DocumentIdWithNgrams]
documentIdWithNgrams f = mapM toDocumentIdWithNgrams documentIdWithNgrams f = mapM toDocumentIdWithNgrams
where where
...@@ -255,14 +254,17 @@ documentIdWithNgrams f = mapM toDocumentIdWithNgrams ...@@ -255,14 +254,17 @@ documentIdWithNgrams f = mapM toDocumentIdWithNgrams
pure $ DocumentIdWithNgrams d e pure $ DocumentIdWithNgrams d e
-- | TODO check optimization -- | TODO check optimization
mapNodeIdNgrams :: [DocumentIdWithNgrams] -> Map (NgramsT Ngrams) (Map NodeId Int) mapNodeIdNgrams :: [DocumentIdWithNgrams] -> Map Ngrams (Map NgramsType (Map NodeId Int))
mapNodeIdNgrams ds = DM.fromListWith (DM.unionWith (+)) xs mapNodeIdNgrams = DM.unionsWith (DM.unionWith (DM.unionWith (+))) . fmap f
where
f :: DocumentIdWithNgrams -> Map Ngrams (Map NgramsType (Map NodeId Int))
f d = fmap (fmap (DM.singleton nId)) $ document_ngrams d
where where
xs = [(ng, DM.singleton nId i) | (nId, n2i') <- ds', (ng, i) <- DM.toList n2i'] nId = documentId $ documentWithId d
ds' = (\d -> ((documentId . documentWithId) d, document_ngrams d)) <$> ds
------------------------------------------------------------------------ ------------------------------------------------------------------------
flowList :: HasNodeError err => UserId -> CorpusId -> Map (NgramsT NgramsIndexed) (Map NodeId Int) -> Cmd err ListId flowList :: HasNodeError err => UserId -> CorpusId
-> Map NgramsIndexed (Map NgramsType (Map NodeId Int)) -> Cmd err ListId
flowList uId cId ngs = do flowList uId cId ngs = do
-- printDebug "ngs:" ngs -- printDebug "ngs:" ngs
lId <- getOrMkList cId uId lId <- getOrMkList cId uId
...@@ -303,9 +305,13 @@ insertGroups lId ngrs = ...@@ -303,9 +305,13 @@ insertGroups lId ngrs =
] ]
------------------------------------------------------------------------ ------------------------------------------------------------------------
-- TODO: verify NgramsT lost here ngrams2list :: Map NgramsIndexed (Map NgramsType a)
ngrams2list :: Map (NgramsT NgramsIndexed) (Map NodeId Int) -> [(ListType, (NgramsType,NgramsIndexed))] -> [(ListType, (NgramsType,NgramsIndexed))]
ngrams2list = zip (repeat GraphList) . map (\(NgramsT ngt ng) -> (ngt, ng)) . DM.keys ngrams2list m =
[ (GraphList, (t, ng))
| (ng, tm) <- DM.toList m
, t <- DM.keys tm
]
-- | TODO: weight of the list could be a probability -- | TODO: weight of the list could be a probability
insertLists :: HasNodeError err => ListId -> [(ListType, (NgramsType, NgramsIndexed))] -> Cmd err Int insertLists :: HasNodeError err => ListId -> [(ListType, (NgramsType, NgramsIndexed))] -> Cmd err Int
......
...@@ -37,7 +37,7 @@ import Gargantext.Database.Schema.Ngrams -- (NgramsType(..)) ...@@ -37,7 +37,7 @@ import Gargantext.Database.Schema.Ngrams -- (NgramsType(..))
import Gargantext.Database.Node.Contact import Gargantext.Database.Node.Contact
import Gargantext.Database.Flow.Utils import Gargantext.Database.Flow.Utils
import Gargantext.Database.Utils (Cmd, runPGSQuery) import Gargantext.Database.Utils (Cmd, runPGSQuery)
import Gargantext.Database.Types.Node (AnnuaireId, CorpusId, ContactId) import Gargantext.Database.Types.Node (AnnuaireId, CorpusId)
import Gargantext.Database.Node.Children import Gargantext.Database.Node.Children
import Gargantext.Core.Types (NodeType(..)) import Gargantext.Core.Types (NodeType(..))
...@@ -79,16 +79,16 @@ extractNgramsT contact = fromList [(NgramsT Authors a' , 1)| a' <- authors ...@@ -79,16 +79,16 @@ extractNgramsT contact = fromList [(NgramsT Authors a' , 1)| a' <- authors
authors = map text2ngrams $ catMaybes [view (hc_who . _Just . cw_lastName) contact] authors = map text2ngrams $ catMaybes [view (hc_who . _Just . cw_lastName) contact]
--} --}
-- NP: notice how this function is no longer specific to the ContactId type
pairMaps :: Map (NgramsT Ngrams) (Map ContactId Int) pairMaps :: Map (NgramsT Ngrams) a
-> Map (NgramsT Ngrams) NgramsId -> Map (NgramsT Ngrams) NgramsId
-> Map (NgramsT NgramsIndexed) (Map ContactId Int) -> Map NgramsIndexed (Map NgramsType a)
pairMaps m1 m2 = DM.fromList $ catMaybes $ map (\(k,n) -> (,) <$> lookup' k m2 <*> Just n) $ DM.toList m1 pairMaps m1 m2 =
where DM.fromList
lookup' k@(NgramsT nt ng) m = case DM.lookup k m of [ (NgramsIndexed ng nId, DM.singleton nt n2i)
Nothing -> Nothing | (k@(NgramsT nt ng),n2i) <- DM.toList m1
Just nId -> Just $ NgramsT nt (NgramsIndexed ng nId) , Just nId <- [DM.lookup k m2]
]
----------------------------------------------------------------------- -----------------------------------------------------------------------
getNgramsTindexed:: CorpusId -> NgramsType -> Cmd err (Map (NgramsT Ngrams) NgramsId) getNgramsTindexed:: CorpusId -> NgramsType -> Cmd err (Map (NgramsT Ngrams) NgramsId)
......
...@@ -55,9 +55,10 @@ data DocumentIdWithNgrams a = ...@@ -55,9 +55,10 @@ data DocumentIdWithNgrams a =
} deriving (Show) } deriving (Show)
-- | TODO for now, list Type is CandidateList, why ? -- | TODO for now, list Type is CandidateList, why ?
insertToNodeNgrams :: Map (NgramsT NgramsIndexed) (Map NodeId Int) -> Cmd err Int insertToNodeNgrams :: Map NgramsIndexed (Map NgramsType (Map NodeId Int)) -> Cmd err Int
insertToNodeNgrams m = insertNodeNgrams [ NodeNgram nId ((_ngramsId . _ngramsT) ng) ((ngramsTypeId . _ngramsType) ng) (listTypeId CandidateList) (fromIntegral n) insertToNodeNgrams m = insertNodeNgrams [ NodeNgram n (_ngramsId ng) (ngramsTypeId t) (listTypeId CandidateList) (fromIntegral i)
| (ng, nId2int) <- DM.toList m | (ng, t2n2i) <- DM.toList m
, (nId, n) <- DM.toList nId2int , (t, n2i) <- DM.toList t2n2i
, (n, i) <- DM.toList n2i
] ]
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment