[ngrams] some more simplification of ngramsByDoc'

parent ab7c1766
Pipeline #7173 failed with stages
in 14 minutes and 21 seconds
......@@ -197,22 +197,38 @@ ngramsByDoc l nt ts docs =
ngramsByDoc' l nt ts <$> docs
-- | Given list of terms and a document, produce a map for this doc's
-- terms count and weights
-- terms count and weights. Notice that the weight is always 1 here.
-- ngramsByDoc' :: Lang
-- -> NgramsType
-- -> [NT.NgramsTerm]
-- -> ContextOnlyId HyperdataDocument
-- -> HashMap.HashMap ExtractedNgrams (DM.Map NgramsType (Map NodeId (Int, TermsCount)))
-- ngramsByDoc' l nt ts doc =
-- HashMap.fromListWith (DM.unionWith (DM.unionWith (\(_a,b) (_a',b') -> (1,b+b')))) withExtractedNgrams
-- where
-- _docNgrams' :: ([(MatchedText, TermsCount)], NodeId)
-- _docNgrams'@(matched, nId) = (docNgrams l ts doc, doc ^. context_oid_id)
-- withExtractedNgrams :: [(ExtractedNgrams, Map NgramsType (Map NodeId (Int, TermsCount)))]
-- withExtractedNgrams =
-- map (\(matchedText, cnt) ->
-- ( SimpleNgrams (text2ngrams matchedText)
-- , DM.singleton nt $ DM.singleton nId (1, cnt) ) ) matched
ngramsByDoc' :: Lang
-> NgramsType
-> [NT.NgramsTerm]
-> ContextOnlyId HyperdataDocument
-> HashMap.HashMap ExtractedNgrams (DM.Map NgramsType (Map NodeId (Int, TermsCount)))
ngramsByDoc' l nt ts doc =
HashMap.fromListWith (DM.unionWith (DM.unionWith (\(_a,b) (_a',b') -> (1,b+b')))) withExtractedNgrams
HashMap.map (\cnt -> DM.singleton nt $ DM.singleton nId (1, cnt)) extractedMap
where
docNgrams' :: ([(MatchedText, TermsCount)], NodeId)
docNgrams' = (docNgrams l ts doc, doc ^. context_oid_id)
_docNgrams' :: ([(MatchedText, TermsCount)], NodeId)
_docNgrams'@(matched, nId) = (docNgrams l ts doc, doc ^. context_oid_id)
(matched, nId) = docNgrams'
withExtractedNgrams :: [(ExtractedNgrams, TermsCount)]
withExtractedNgrams = first (SimpleNgrams . text2ngrams) <$> matched
withExtractedNgrams :: [(ExtractedNgrams, Map NgramsType (Map NodeId (Int, TermsCount)))]
withExtractedNgrams =
map (\(matchedText, cnt) ->
( SimpleNgrams (text2ngrams matchedText)
, DM.singleton nt $ DM.singleton nId (1, cnt) ) ) matched
extractedMap :: HashMap.HashMap ExtractedNgrams TermsCount
extractedMap = HashMap.fromListWith (+) withExtractedNgrams
......@@ -138,7 +138,7 @@ testNgramsByDoc01 = do
let hd1 = emptyHyperdataDocument { _hd_title = Just "hello world, kaboom"
, _hd_abstract = Nothing }
let ctx1 = ContextOnlyId 1 hd1
let hd2 = emptyHyperdataDocument { _hd_title = Just "world, boom"
let hd2 = emptyHyperdataDocument { _hd_title = Just "world, boom world"
, _hd_abstract = Nothing }
let ctx2 = ContextOnlyId 2 hd2
......@@ -151,7 +151,7 @@ testNgramsByDoc01 = do
]
, HashMap.fromList
[ ( SimpleNgrams $ UnsafeNgrams { _ngramsTerms = "world", _ngramsSize = 1 }
, Map.singleton NgramsTerms $ Map.singleton (UnsafeMkNodeId 2) (1, 1) )
, Map.singleton NgramsTerms $ Map.singleton (UnsafeMkNodeId 2) (1, 2) )
]
]
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment