Commit f57909bb authored by Alexandre Delanoë's avatar Alexandre Delanoë

[Cosmetics] before new flow.

parent 93bf6e56
...@@ -41,7 +41,7 @@ import Gargantext.Database.Flow.Utils (insertToNodeNgrams) ...@@ -41,7 +41,7 @@ import Gargantext.Database.Flow.Utils (insertToNodeNgrams)
import Gargantext.Database.Metrics.TFICF (getTficf) import Gargantext.Database.Metrics.TFICF (getTficf)
import Gargantext.Text.Terms (extractTerms) import Gargantext.Text.Terms (extractTerms)
import Gargantext.Text.Metrics.TFICF (Tficf(..)) import Gargantext.Text.Metrics.TFICF (Tficf(..))
--import Gargantext.Database.Metrics.Count (getNgramsElementsWithParentNodeId) import Gargantext.Database.Metrics.Count (getNgramsElementsWithParentNodeId)
import Gargantext.Database.Node.Document.Add (add) import Gargantext.Database.Node.Document.Add (add)
import Gargantext.Database.Node.Document.Insert (insertDocuments, ReturnId(..), addUniqIdsDoc, addUniqIdsContact, ToDbData(..)) import Gargantext.Database.Node.Document.Insert (insertDocuments, ReturnId(..), addUniqIdsDoc, addUniqIdsContact, ToDbData(..))
import Gargantext.Database.Root (getRoot) import Gargantext.Database.Root (getRoot)
...@@ -71,7 +71,6 @@ type FlowCmdM env err m = ...@@ -71,7 +71,6 @@ type FlowCmdM env err m =
, HasRepoVar env , HasRepoVar env
) )
flowCorpus :: FlowCmdM env ServantErr m flowCorpus :: FlowCmdM env ServantErr m
=> FileFormat -> FilePath -> CorpusName -> m CorpusId => FileFormat -> FilePath -> CorpusName -> m CorpusId
flowCorpus ff fp cName = do flowCorpus ff fp cName = do
...@@ -106,7 +105,12 @@ flowCorpus' :: FlowCmdM env err m ...@@ -106,7 +105,12 @@ flowCorpus' :: FlowCmdM env err m
=> NodeType -> [HyperdataDocument] => NodeType -> [HyperdataDocument]
-> ([ReturnId], UserId, CorpusId, UserId, CorpusId) -> ([ReturnId], UserId, CorpusId, UserId, CorpusId)
-> m CorpusId -> m CorpusId
flowCorpus' NodeCorpus hyperdataDocuments (ids,masterUserId,masterCorpusId, userId,userCorpusId) = do flowCorpus' NodeCorpus
hyperdataDocuments
( ids
, masterUserId
, masterCorpusId
, userId,userCorpusId) = do
-------------------------------------------------- --------------------------------------------------
let documentsWithId = mergeData (toInserted ids) (toInsert hyperdataDocuments) let documentsWithId = mergeData (toInserted ids) (toInsert hyperdataDocuments)
...@@ -124,8 +128,8 @@ flowCorpus' NodeCorpus hyperdataDocuments (ids,masterUserId,masterCorpusId, user ...@@ -124,8 +128,8 @@ flowCorpus' NodeCorpus hyperdataDocuments (ids,masterUserId,masterCorpusId, user
-- List Ngrams Flow -- List Ngrams Flow
-- get elements -- get elements
-- filter by TFICF -- filter by TFICF
let ngs = ngrams2list' indexedNgrams --let ngs = ngrams2list' indexedNgrams
--let ngs = getNgramsElementsWithParentNodeId masterCorpusId ngs <- getNgramsElementsWithParentNodeId masterCorpusId
_masterListId <- flowList masterUserId masterCorpusId ngs _masterListId <- flowList masterUserId masterCorpusId ngs
_userListId <- flowListUser userId userCorpusId ngs 100 _userListId <- flowListUser userId userCorpusId ngs 100
-------------------------------------------------- --------------------------------------------------
...@@ -138,13 +142,21 @@ flowCorpus' NodeCorpus hyperdataDocuments (ids,masterUserId,masterCorpusId, user ...@@ -138,13 +142,21 @@ flowCorpus' NodeCorpus hyperdataDocuments (ids,masterUserId,masterCorpusId, user
pure userCorpusId pure userCorpusId
-- del [corpusId2, corpusId] -- del [corpusId2, corpusId]
flowCorpus' NodeAnnuaire _hyperdataDocuments (_ids,_masterUserId,_masterCorpusId,_userId,_userCorpusId) = undefined flowCorpus' NodeAnnuaire
_hyperdataDocuments
( _ids
, _masterUserId
, _masterCorpusId
, _userId
, _userCorpusId) = undefined
flowCorpus' _ _ _ = undefined flowCorpus' _ _ _ = undefined
type CorpusName = Text type CorpusName = Text
subFlowCorpus :: HasNodeError err => Username -> CorpusName -> Cmd err (UserId, RootId, CorpusId) subFlowCorpus :: HasNodeError err
=> Username -> CorpusName
-> Cmd err (UserId, RootId, CorpusId)
subFlowCorpus username cName = do subFlowCorpus username cName = do
maybeUserId <- getUser username maybeUserId <- getUser username
userId <- case maybeUserId of userId <- case maybeUserId of
...@@ -187,7 +199,7 @@ subFlowCorpus username cName = do ...@@ -187,7 +199,7 @@ subFlowCorpus username cName = do
toInsert :: [HyperdataDocument] -> Map HashId HyperdataDocument toInsert :: [HyperdataDocument] -> Map HashId HyperdataDocument
toInsert = DM.fromList . map (\d -> (maybe err identity (_hyperdataDocument_uniqId d), d)) toInsert = DM.fromList . map (\d -> (maybe err identity (_hyperdataDocument_uniqId d), d))
where where
err = "Database.Flow.toInsert" err = "[ERROR] Database.Flow.toInsert"
toInserted :: [ReturnId] -> Map HashId ReturnId toInserted :: [ReturnId] -> Map HashId ReturnId
toInserted = DM.fromList . map (\r -> (reUniqId r, r) ) toInserted = DM.fromList . map (\r -> (reUniqId r, r) )
...@@ -198,7 +210,9 @@ data DocumentWithId = ...@@ -198,7 +210,9 @@ data DocumentWithId =
, documentData :: !HyperdataDocument , documentData :: !HyperdataDocument
} deriving (Show) } deriving (Show)
mergeData :: Map HashId ReturnId -> Map HashId HyperdataDocument -> [DocumentWithId] mergeData :: Map HashId ReturnId
-> Map HashId HyperdataDocument
-> [DocumentWithId]
mergeData rs = catMaybes . map toDocumentWithId . DM.toList mergeData rs = catMaybes . map toDocumentWithId . DM.toList
where where
toDocumentWithId (hash,hpd) = toDocumentWithId (hash,hpd) =
...@@ -213,13 +227,30 @@ data DocumentIdWithNgrams = ...@@ -213,13 +227,30 @@ data DocumentIdWithNgrams =
} deriving (Show) } deriving (Show)
-- TODO group terms -- TODO group terms
extractNgramsT :: HasNodeError err => HyperdataDocument -> Cmd err (Map Ngrams (Map NgramsType Int)) extractNgramsT :: HasNodeError err
=> HyperdataDocument
-> Cmd err (Map Ngrams (Map NgramsType Int))
extractNgramsT doc = do extractNgramsT doc = do
let source = text2ngrams $ maybe "Nothing" identity $ _hyperdataDocument_source doc let source = text2ngrams
let institutes = map text2ngrams $ maybe ["Nothing"] (map toSchoolName . (splitOn ", ")) $ _hyperdataDocument_institutes doc $ maybe "Nothing" identity
let authors = map text2ngrams $ maybe ["Nothing"] (splitOn ", ") $ _hyperdataDocument_authors doc $ _hyperdataDocument_source doc
let leText = catMaybes [_hyperdataDocument_title doc, _hyperdataDocument_abstract doc]
terms' <- map text2ngrams <$> map (intercalate " " . _terms_label) <$> concat <$> liftIO (extractTerms (Multi EN) leText) institutes = map text2ngrams
$ maybe ["Nothing"] (map toSchoolName . (splitOn ", "))
$ _hyperdataDocument_institutes doc
authors = map text2ngrams
$ maybe ["Nothing"] (splitOn ", ")
$ _hyperdataDocument_authors doc
leText = catMaybes [ _hyperdataDocument_title doc
, _hyperdataDocument_abstract doc
]
terms' <- map text2ngrams
<$> map (intercalate " " . _terms_label)
<$> concat
<$> liftIO (extractTerms (Multi EN) leText)
pure $ DM.fromList $ [(source, DM.singleton Sources 1)] pure $ DM.fromList $ [(source, DM.singleton Sources 1)]
<> [(i', DM.singleton Institutes 1) | i' <- institutes ] <> [(i', DM.singleton Institutes 1) | i' <- institutes ]
...@@ -229,8 +260,10 @@ extractNgramsT doc = do ...@@ -229,8 +260,10 @@ extractNgramsT doc = do
documentIdWithNgrams :: HasNodeError err documentIdWithNgrams :: HasNodeError err
=> (HyperdataDocument -> Cmd err (Map Ngrams (Map NgramsType Int))) => (HyperdataDocument
-> [DocumentWithId] -> Cmd err [DocumentIdWithNgrams] -> Cmd err (Map Ngrams (Map NgramsType Int)))
-> [DocumentWithId]
-> Cmd err [DocumentIdWithNgrams]
documentIdWithNgrams f = mapM toDocumentIdWithNgrams documentIdWithNgrams f = mapM toDocumentIdWithNgrams
where where
toDocumentIdWithNgrams d = do toDocumentIdWithNgrams d = do
...@@ -238,10 +271,12 @@ documentIdWithNgrams f = mapM toDocumentIdWithNgrams ...@@ -238,10 +271,12 @@ documentIdWithNgrams f = mapM toDocumentIdWithNgrams
pure $ DocumentIdWithNgrams d e pure $ DocumentIdWithNgrams d e
-- | TODO check optimization -- | TODO check optimization
mapNodeIdNgrams :: [DocumentIdWithNgrams] -> Map Ngrams (Map NgramsType (Map NodeId Int)) mapNodeIdNgrams :: [DocumentIdWithNgrams]
-> Map Ngrams (Map NgramsType (Map NodeId Int))
mapNodeIdNgrams = DM.unionsWith (DM.unionWith (DM.unionWith (+))) . fmap f mapNodeIdNgrams = DM.unionsWith (DM.unionWith (DM.unionWith (+))) . fmap f
where where
f :: DocumentIdWithNgrams -> Map Ngrams (Map NgramsType (Map NodeId Int)) f :: DocumentIdWithNgrams
-> Map Ngrams (Map NgramsType (Map NodeId Int))
f d = fmap (fmap (DM.singleton nId)) $ document_ngrams d f d = fmap (fmap (DM.singleton nId)) $ document_ngrams d
where where
nId = documentId $ documentWithId d nId = documentId $ documentWithId d
...@@ -271,7 +306,10 @@ flowList uId cId ngs = do ...@@ -271,7 +306,10 @@ flowList uId cId ngs = do
pure lId pure lId
flowListUser :: FlowCmdM env err m flowListUser :: FlowCmdM env err m
=> UserId -> CorpusId -> Map NgramsType [NgramsElement] -> Int -> m ListId => UserId -> CorpusId
-> Map NgramsType [NgramsElement]
-> Int
-> m ListId
flowListUser uId cId ngsM n = do flowListUser uId cId ngsM n = do
lId <- getOrMkList cId uId lId <- getOrMkList cId uId
...@@ -301,7 +339,10 @@ groupNgramsBy isEqual cId = DM.fromList $ catMaybes [ isEqual n1 n2 | n1 <- DM.k ...@@ -301,7 +339,10 @@ groupNgramsBy isEqual cId = DM.fromList $ catMaybes [ isEqual n1 n2 | n1 <- DM.k
-- TODO check: do not insert duplicates -- TODO check: do not insert duplicates
insertGroups :: HasNodeError err => ListId -> Map NgramsIndexed NgramsIndexed -> Cmd err Int insertGroups :: HasNodeError err
=> ListId
-> Map NgramsIndexed NgramsIndexed
-> Cmd err Int
insertGroups lId ngrs = insertGroups lId ngrs =
insertNodeNgramsNgramsNew [ NodeNgramsNgrams lId ng1 ng2 (Just 1) insertNodeNgramsNgramsNew [ NodeNgramsNgrams lId ng1 ng2 (Just 1)
| (ng1, ng2) <- map (both _ngramsId) $ DM.toList ngrs | (ng1, ng2) <- map (both _ngramsId) $ DM.toList ngrs
...@@ -329,7 +370,10 @@ ngrams2list' m = fromListWith (<>) ...@@ -329,7 +370,10 @@ ngrams2list' m = fromListWith (<>)
-- | TODO: weight of the list could be a probability -- | TODO: weight of the list could be a probability
insertLists :: HasNodeError err => ListId -> [(ListType, (NgramsType, NgramsIndexed))] -> Cmd err Int insertLists :: HasNodeError err
=> ListId
-> [(ListType, (NgramsType, NgramsIndexed))]
-> Cmd err Int
insertLists lId lngs = insertNodeNgrams [ NodeNgram lId (_ngramsId ng) Nothing (ngramsTypeId ngt) (fromIntegral $ listTypeId l) 1 insertLists lId lngs = insertNodeNgrams [ NodeNgram lId (_ngramsId ng) Nothing (ngramsTypeId ngt) (fromIntegral $ listTypeId l) 1
| (l,(ngt, ng)) <- lngs | (l,(ngt, ng)) <- lngs
] ]
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment