[ngrams] code fixes according to review

Related MR:
!378
parent bf89561b
Pipeline #7287 failed with stages
in 45 minutes and 56 seconds
......@@ -75,7 +75,6 @@ import_p = fmap CCMD_import $ ImportArgs
<*> ( option str ( long "user") )
<*> ( option str ( long "name") )
<*> settings_p
-- <*> (fmap Limit ( option auto ( long "limit" <> metavar "INT" <> help "The limit for the query") ))
<*> ( option str ( long "corpus-path" <> help "Path to corpus file") )
function_p :: String -> Either String ImportFunction
......
......@@ -55,25 +55,21 @@ api userInviting nId (ShareTeamParams user') = do
pure u
Left _err -> do
username' <- getUsername userInviting
if username' `List.elem` arbitraryUsername
then do
-- printDebug "[G.A.N.Share.api]" ("Demo users are not allowed to invite" :: Text)
pure ()
else do
-- TODO better analysis of the composition of what is shared
children <- findNodesWithType nId [NodeList] [ NodeFolderShared
, NodeTeam
, NodeFolder
, NodeCorpus
]
_ <- if List.null children
then do
-- printDebug "[G.A.N.Share.api]" ("Invitation is enabled if you share a corpus at least" :: Text)
pure $ UnsafeMkUserId 0
else do
-- printDebug "[G.A.N.Share.api]" ("Your invitation is sent to: " <> user'')
newUser user''
pure ()
unless (username' `List.elem` arbitraryUsername) $ do
-- TODO better analysis of the composition of what is shared
children <- findNodesWithType nId [NodeList] [ NodeFolderShared
, NodeTeam
, NodeFolder
, NodeCorpus
]
_ <- if List.null children
then do
-- printDebug "[G.A.N.Share.api]" ("Invitation is enabled if you share a corpus at least" :: Text)
pure $ UnsafeMkUserId 0
else do
-- printDebug "[G.A.N.Share.api]" ("Your invitation is sent to: " <> user'')
newUser user''
pure ()
pure u
fromIntegral <$> DB.shareNodeWith (ShareNodeWith_User NodeFolderShared (UserName user)) nId
......
......@@ -389,12 +389,6 @@ buildSocialList l user userCorpusId listId ctype mfslw = do
<- getOrMkRootWithCorpus MkCorpusUserMaster ctype
nlpServer <- view (nlpServerGet l)
--let gp = (GroupParams l 2 3 (StopSize 3))
-- Here the PosTagAlgo should be chosen according to the Lang
-- let gp = GroupParams { unGroupParams_lang = l
-- , unGroupParams_len = 10
-- , unGroupParams_limit = 10
-- , unGroupParams_stopSize = StopSize 10 }
let gp = GroupWithPosTag l nlpServer HashMap.empty
ngs <- buildNgramsLists user userCorpusId masterCorpusId mfslw gp
......@@ -424,7 +418,6 @@ insertMasterDocs ncs c lang hs = do
-- add documents to the corpus (create node_node link)
-- this will enable global database monitoring
-- maps :: IO Map Ngrams (Map NgramsType (Map NodeId Int))
mapNgramsDocs' :: HashMap.HashMap ExtractedNgrams (Map NgramsType (Map NodeId (TermsWeight, TermsCount)))
<- mapNodeIdNgrams
<$> documentIdWithNgrams
......@@ -432,10 +425,8 @@ insertMasterDocs ncs c lang hs = do
(map (B.first contextId2NodeId) documentsWithId)
lId <- getOrMkList masterCorpusId masterUserId
-- _ <- saveDocNgramsWith lId mapNgramsDocs'
_ <- saveDocNgramsWith lId mapNgramsDocs'
-- _cooc <- insertDefaultNode NodeListCooc lId masterUserId
pure $ map contextId2NodeId ids'
......@@ -444,9 +435,6 @@ saveDocNgramsWith :: (IsDBCmd env err m)
-> HashMap.HashMap ExtractedNgrams (Map NgramsType (Map NodeId (TermsWeight, TermsCount)))
-> m ()
saveDocNgramsWith lId mapNgramsDocs' = do
--printDebug "[saveDocNgramsWith] mapNgramsDocs'" mapNgramsDocs'
-- let mapNgramsDocsNoCount :: HashMap.HashMap ExtractedNgrams (Map NgramsType (Map NodeId Int))
-- mapNgramsDocsNoCount = over (traverse . traverse . traverse) fst mapNgramsDocs'
(terms2id :: HashMap.HashMap Text NgramsId) <- insertExtractedNgrams $ HashMap.keys mapNgramsDocs'
let mapNgramsDocs :: HashMap.HashMap Ngrams (Map NgramsType (Map NodeId (TermsWeight, TermsCount)))
......@@ -457,8 +445,6 @@ saveDocNgramsWith lId mapNgramsDocs' = do
$ map (bimap _ngramsTerms Map.keys)
$ HashMap.toList mapNgramsDocs
--printDebug "saveDocNgramsWith" mapCgramsId
-- insertDocNgrams
let ngrams2insert = catMaybes [ ContextNodeNgrams2 (nodeId2ContextId nId)
<$> getCgramsId mapCgramsId ngrams_type (_ngramsTerms terms'')
<*> Just (fromIntegral $ unTermsWeight w :: Double)
......@@ -466,7 +452,6 @@ saveDocNgramsWith lId mapNgramsDocs' = do
, (ngrams_type, mapNodeIdWeight) <- Map.toList mapNgramsTypes
, (nId, (w, _cnt)) <- Map.toList mapNodeIdWeight
]
-- printDebug "Ngrams2Insert" ngrams2insert
_return <- insertContextNodeNgrams2 ngrams2insert
-- to be removed
......@@ -501,9 +486,6 @@ reIndexWith cId lId nt lts = do
-- Get all documents of the corpus
(docs :: [ContextOnlyId HyperdataDocument]) <- selectDocNodesOnlyId cId
let ngramsByDoc' = ngramsByDoc corpusLang nt ts docs
-- Saving the indexation in database
mapM_ (saveDocNgramsWith lId) ngramsByDoc'
pure ()
mapM_ (saveDocNgramsWith lId . ngramsByDoc corpusLang nt ts) docs
......@@ -185,23 +185,14 @@ toInserted =
-- | Given list of terms and a document, produce a map for this doc's
-- terms count and weights. Notice that the weight is always 1 here.
ngramsByDoc :: Lang
-> NgramsType
-> [NT.NgramsTerm]
-> [ContextOnlyId HyperdataDocument]
-> [HashMap.HashMap ExtractedNgrams (DM.Map NgramsType (Map NodeId (TermsWeight, TermsCount)))]
ngramsByDoc l nt ts docs =
ngramsByDoc' l nt ts <$> docs
-- | Given list of terms and a document, produce a map for this doc's
-- terms count and weights. Notice that the weight is always 1 here.
ngramsByDoc' :: Lang
-> NgramsType
-> [NT.NgramsTerm]
-> ContextOnlyId HyperdataDocument
-> HashMap.HashMap ExtractedNgrams (DM.Map NgramsType (Map NodeId (TermsWeight, TermsCount)))
ngramsByDoc' l nt ts doc =
-> ContextOnlyId HyperdataDocument
-> HashMap.HashMap ExtractedNgrams (DM.Map NgramsType (Map NodeId (TermsWeight, TermsCount)))
ngramsByDoc l nt ts doc =
HashMap.map (\cnt -> DM.singleton nt $ DM.singleton nId (1, cnt)) extractedMap
where
matched :: [(MatchedText, TermsCount)]
......
......@@ -54,6 +54,8 @@ $(makeLensesWith abbreviatedFields ''ContextPoly)
------------------------------------------------------------------------
-- | This datatype describes queries in the `contexts` table, where
-- only `id` and `hyperdata` are fetched.
data ContextPolyOnlyId id hyperdata =
ContextOnlyId { _context_oid_id :: !id
, _context_oid_hyperdata :: !hyperdata }
......
......@@ -142,18 +142,16 @@ testNgramsByDoc01 = do
, _hd_abstract = Nothing }
let ctx2 = ContextOnlyId 2 hd2
ngramsByDoc EN NgramsTerms terms [ctx1, ctx2] @?=
[ HashMap.fromList
ngramsByDoc EN NgramsTerms terms ctx1 @?=
HashMap.fromList
[ ( SimpleNgrams $ UnsafeNgrams { _ngramsTerms = "hello", _ngramsSize = 1 }
, Map.singleton NgramsTerms $ Map.singleton (UnsafeMkNodeId 1) (1, 1) )
, ( SimpleNgrams $ UnsafeNgrams { _ngramsTerms = "world", _ngramsSize = 1 }
, Map.singleton NgramsTerms $ Map.singleton (UnsafeMkNodeId 1) (1, 1) )
]
, HashMap.fromList
ngramsByDoc EN NgramsTerms terms ctx2 @?=
HashMap.fromList
[ ( SimpleNgrams $ UnsafeNgrams { _ngramsTerms = "world", _ngramsSize = 1 }
, Map.singleton NgramsTerms $ Map.singleton (UnsafeMkNodeId 2) (1, 2) )
]
]
ngramsByDoc EN NgramsTerms terms [ctx1, ctx2] @?=
(ngramsByDoc EN NgramsTerms terms [ctx1]) <> (ngramsByDoc EN NgramsTerms terms [ctx2])
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment