[ngrams] code fixes according to review

Related MR:
!378
parent bf89561b
Pipeline #7287 failed with stages
in 45 minutes and 56 seconds
...@@ -75,7 +75,6 @@ import_p = fmap CCMD_import $ ImportArgs ...@@ -75,7 +75,6 @@ import_p = fmap CCMD_import $ ImportArgs
<*> ( option str ( long "user") ) <*> ( option str ( long "user") )
<*> ( option str ( long "name") ) <*> ( option str ( long "name") )
<*> settings_p <*> settings_p
-- <*> (fmap Limit ( option auto ( long "limit" <> metavar "INT" <> help "The limit for the query") ))
<*> ( option str ( long "corpus-path" <> help "Path to corpus file") ) <*> ( option str ( long "corpus-path" <> help "Path to corpus file") )
function_p :: String -> Either String ImportFunction function_p :: String -> Either String ImportFunction
......
...@@ -55,25 +55,21 @@ api userInviting nId (ShareTeamParams user') = do ...@@ -55,25 +55,21 @@ api userInviting nId (ShareTeamParams user') = do
pure u pure u
Left _err -> do Left _err -> do
username' <- getUsername userInviting username' <- getUsername userInviting
if username' `List.elem` arbitraryUsername unless (username' `List.elem` arbitraryUsername) $ do
then do -- TODO better analysis of the composition of what is shared
-- printDebug "[G.A.N.Share.api]" ("Demo users are not allowed to invite" :: Text) children <- findNodesWithType nId [NodeList] [ NodeFolderShared
pure () , NodeTeam
else do , NodeFolder
-- TODO better analysis of the composition of what is shared , NodeCorpus
children <- findNodesWithType nId [NodeList] [ NodeFolderShared ]
, NodeTeam _ <- if List.null children
, NodeFolder then do
, NodeCorpus -- printDebug "[G.A.N.Share.api]" ("Invitation is enabled if you share a corpus at least" :: Text)
] pure $ UnsafeMkUserId 0
_ <- if List.null children else do
then do -- printDebug "[G.A.N.Share.api]" ("Your invitation is sent to: " <> user'')
-- printDebug "[G.A.N.Share.api]" ("Invitation is enabled if you share a corpus at least" :: Text) newUser user''
pure $ UnsafeMkUserId 0 pure ()
else do
-- printDebug "[G.A.N.Share.api]" ("Your invitation is sent to: " <> user'')
newUser user''
pure ()
pure u pure u
fromIntegral <$> DB.shareNodeWith (ShareNodeWith_User NodeFolderShared (UserName user)) nId fromIntegral <$> DB.shareNodeWith (ShareNodeWith_User NodeFolderShared (UserName user)) nId
......
...@@ -389,12 +389,6 @@ buildSocialList l user userCorpusId listId ctype mfslw = do ...@@ -389,12 +389,6 @@ buildSocialList l user userCorpusId listId ctype mfslw = do
<- getOrMkRootWithCorpus MkCorpusUserMaster ctype <- getOrMkRootWithCorpus MkCorpusUserMaster ctype
nlpServer <- view (nlpServerGet l) nlpServer <- view (nlpServerGet l)
--let gp = (GroupParams l 2 3 (StopSize 3))
-- Here the PosTagAlgo should be chosen according to the Lang
-- let gp = GroupParams { unGroupParams_lang = l
-- , unGroupParams_len = 10
-- , unGroupParams_limit = 10
-- , unGroupParams_stopSize = StopSize 10 }
let gp = GroupWithPosTag l nlpServer HashMap.empty let gp = GroupWithPosTag l nlpServer HashMap.empty
ngs <- buildNgramsLists user userCorpusId masterCorpusId mfslw gp ngs <- buildNgramsLists user userCorpusId masterCorpusId mfslw gp
...@@ -424,7 +418,6 @@ insertMasterDocs ncs c lang hs = do ...@@ -424,7 +418,6 @@ insertMasterDocs ncs c lang hs = do
-- add documents to the corpus (create node_node link) -- add documents to the corpus (create node_node link)
-- this will enable global database monitoring -- this will enable global database monitoring
-- maps :: IO Map Ngrams (Map NgramsType (Map NodeId Int))
mapNgramsDocs' :: HashMap.HashMap ExtractedNgrams (Map NgramsType (Map NodeId (TermsWeight, TermsCount))) mapNgramsDocs' :: HashMap.HashMap ExtractedNgrams (Map NgramsType (Map NodeId (TermsWeight, TermsCount)))
<- mapNodeIdNgrams <- mapNodeIdNgrams
<$> documentIdWithNgrams <$> documentIdWithNgrams
...@@ -432,10 +425,8 @@ insertMasterDocs ncs c lang hs = do ...@@ -432,10 +425,8 @@ insertMasterDocs ncs c lang hs = do
(map (B.first contextId2NodeId) documentsWithId) (map (B.first contextId2NodeId) documentsWithId)
lId <- getOrMkList masterCorpusId masterUserId lId <- getOrMkList masterCorpusId masterUserId
-- _ <- saveDocNgramsWith lId mapNgramsDocs'
_ <- saveDocNgramsWith lId mapNgramsDocs' _ <- saveDocNgramsWith lId mapNgramsDocs'
-- _cooc <- insertDefaultNode NodeListCooc lId masterUserId
pure $ map contextId2NodeId ids' pure $ map contextId2NodeId ids'
...@@ -444,9 +435,6 @@ saveDocNgramsWith :: (IsDBCmd env err m) ...@@ -444,9 +435,6 @@ saveDocNgramsWith :: (IsDBCmd env err m)
-> HashMap.HashMap ExtractedNgrams (Map NgramsType (Map NodeId (TermsWeight, TermsCount))) -> HashMap.HashMap ExtractedNgrams (Map NgramsType (Map NodeId (TermsWeight, TermsCount)))
-> m () -> m ()
saveDocNgramsWith lId mapNgramsDocs' = do saveDocNgramsWith lId mapNgramsDocs' = do
--printDebug "[saveDocNgramsWith] mapNgramsDocs'" mapNgramsDocs'
-- let mapNgramsDocsNoCount :: HashMap.HashMap ExtractedNgrams (Map NgramsType (Map NodeId Int))
-- mapNgramsDocsNoCount = over (traverse . traverse . traverse) fst mapNgramsDocs'
(terms2id :: HashMap.HashMap Text NgramsId) <- insertExtractedNgrams $ HashMap.keys mapNgramsDocs' (terms2id :: HashMap.HashMap Text NgramsId) <- insertExtractedNgrams $ HashMap.keys mapNgramsDocs'
let mapNgramsDocs :: HashMap.HashMap Ngrams (Map NgramsType (Map NodeId (TermsWeight, TermsCount))) let mapNgramsDocs :: HashMap.HashMap Ngrams (Map NgramsType (Map NodeId (TermsWeight, TermsCount)))
...@@ -457,8 +445,6 @@ saveDocNgramsWith lId mapNgramsDocs' = do ...@@ -457,8 +445,6 @@ saveDocNgramsWith lId mapNgramsDocs' = do
$ map (bimap _ngramsTerms Map.keys) $ map (bimap _ngramsTerms Map.keys)
$ HashMap.toList mapNgramsDocs $ HashMap.toList mapNgramsDocs
--printDebug "saveDocNgramsWith" mapCgramsId
-- insertDocNgrams
let ngrams2insert = catMaybes [ ContextNodeNgrams2 (nodeId2ContextId nId) let ngrams2insert = catMaybes [ ContextNodeNgrams2 (nodeId2ContextId nId)
<$> getCgramsId mapCgramsId ngrams_type (_ngramsTerms terms'') <$> getCgramsId mapCgramsId ngrams_type (_ngramsTerms terms'')
<*> Just (fromIntegral $ unTermsWeight w :: Double) <*> Just (fromIntegral $ unTermsWeight w :: Double)
...@@ -466,7 +452,6 @@ saveDocNgramsWith lId mapNgramsDocs' = do ...@@ -466,7 +452,6 @@ saveDocNgramsWith lId mapNgramsDocs' = do
, (ngrams_type, mapNodeIdWeight) <- Map.toList mapNgramsTypes , (ngrams_type, mapNodeIdWeight) <- Map.toList mapNgramsTypes
, (nId, (w, _cnt)) <- Map.toList mapNodeIdWeight , (nId, (w, _cnt)) <- Map.toList mapNodeIdWeight
] ]
-- printDebug "Ngrams2Insert" ngrams2insert
_return <- insertContextNodeNgrams2 ngrams2insert _return <- insertContextNodeNgrams2 ngrams2insert
-- to be removed -- to be removed
...@@ -501,9 +486,6 @@ reIndexWith cId lId nt lts = do ...@@ -501,9 +486,6 @@ reIndexWith cId lId nt lts = do
-- Get all documents of the corpus -- Get all documents of the corpus
(docs :: [ContextOnlyId HyperdataDocument]) <- selectDocNodesOnlyId cId (docs :: [ContextOnlyId HyperdataDocument]) <- selectDocNodesOnlyId cId
let ngramsByDoc' = ngramsByDoc corpusLang nt ts docs
-- Saving the indexation in database -- Saving the indexation in database
mapM_ (saveDocNgramsWith lId) ngramsByDoc' mapM_ (saveDocNgramsWith lId . ngramsByDoc corpusLang nt ts) docs
pure ()
...@@ -185,23 +185,14 @@ toInserted = ...@@ -185,23 +185,14 @@ toInserted =
-- | Given list of terms and a document, produce a map for this doc's
-- terms count and weights. Notice that the weight is always 1 here.
ngramsByDoc :: Lang ngramsByDoc :: Lang
-> NgramsType -> NgramsType
-> [NT.NgramsTerm] -> [NT.NgramsTerm]
-> [ContextOnlyId HyperdataDocument] -> ContextOnlyId HyperdataDocument
-> [HashMap.HashMap ExtractedNgrams (DM.Map NgramsType (Map NodeId (TermsWeight, TermsCount)))] -> HashMap.HashMap ExtractedNgrams (DM.Map NgramsType (Map NodeId (TermsWeight, TermsCount)))
ngramsByDoc l nt ts docs = ngramsByDoc l nt ts doc =
ngramsByDoc' l nt ts <$> docs
-- | Given list of terms and a document, produce a map for this doc's
-- terms count and weights. Notice that the weight is always 1 here.
ngramsByDoc' :: Lang
-> NgramsType
-> [NT.NgramsTerm]
-> ContextOnlyId HyperdataDocument
-> HashMap.HashMap ExtractedNgrams (DM.Map NgramsType (Map NodeId (TermsWeight, TermsCount)))
ngramsByDoc' l nt ts doc =
HashMap.map (\cnt -> DM.singleton nt $ DM.singleton nId (1, cnt)) extractedMap HashMap.map (\cnt -> DM.singleton nt $ DM.singleton nId (1, cnt)) extractedMap
where where
matched :: [(MatchedText, TermsCount)] matched :: [(MatchedText, TermsCount)]
......
...@@ -54,6 +54,8 @@ $(makeLensesWith abbreviatedFields ''ContextPoly) ...@@ -54,6 +54,8 @@ $(makeLensesWith abbreviatedFields ''ContextPoly)
------------------------------------------------------------------------ ------------------------------------------------------------------------
-- | This datatype describes queries in the `contexts` table, where
-- only `id` and `hyperdata` are fetched.
data ContextPolyOnlyId id hyperdata = data ContextPolyOnlyId id hyperdata =
ContextOnlyId { _context_oid_id :: !id ContextOnlyId { _context_oid_id :: !id
, _context_oid_hyperdata :: !hyperdata } , _context_oid_hyperdata :: !hyperdata }
......
...@@ -142,18 +142,16 @@ testNgramsByDoc01 = do ...@@ -142,18 +142,16 @@ testNgramsByDoc01 = do
, _hd_abstract = Nothing } , _hd_abstract = Nothing }
let ctx2 = ContextOnlyId 2 hd2 let ctx2 = ContextOnlyId 2 hd2
ngramsByDoc EN NgramsTerms terms [ctx1, ctx2] @?= ngramsByDoc EN NgramsTerms terms ctx1 @?=
[ HashMap.fromList HashMap.fromList
[ ( SimpleNgrams $ UnsafeNgrams { _ngramsTerms = "hello", _ngramsSize = 1 } [ ( SimpleNgrams $ UnsafeNgrams { _ngramsTerms = "hello", _ngramsSize = 1 }
, Map.singleton NgramsTerms $ Map.singleton (UnsafeMkNodeId 1) (1, 1) ) , Map.singleton NgramsTerms $ Map.singleton (UnsafeMkNodeId 1) (1, 1) )
, ( SimpleNgrams $ UnsafeNgrams { _ngramsTerms = "world", _ngramsSize = 1 } , ( SimpleNgrams $ UnsafeNgrams { _ngramsTerms = "world", _ngramsSize = 1 }
, Map.singleton NgramsTerms $ Map.singleton (UnsafeMkNodeId 1) (1, 1) ) , Map.singleton NgramsTerms $ Map.singleton (UnsafeMkNodeId 1) (1, 1) )
] ]
, HashMap.fromList
ngramsByDoc EN NgramsTerms terms ctx2 @?=
HashMap.fromList
[ ( SimpleNgrams $ UnsafeNgrams { _ngramsTerms = "world", _ngramsSize = 1 } [ ( SimpleNgrams $ UnsafeNgrams { _ngramsTerms = "world", _ngramsSize = 1 }
, Map.singleton NgramsTerms $ Map.singleton (UnsafeMkNodeId 2) (1, 2) ) , Map.singleton NgramsTerms $ Map.singleton (UnsafeMkNodeId 2) (1, 2) )
] ]
]
ngramsByDoc EN NgramsTerms terms [ctx1, ctx2] @?=
(ngramsByDoc EN NgramsTerms terms [ctx1]) <> (ngramsByDoc EN NgramsTerms terms [ctx2])
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment