Commit d35f64fd authored by Alfredo Di Napoli's avatar Alfredo Di Napoli

Generalise index in DocumentIdWithNgrams

parent f280b7e1
Pipeline #7619 passed with stages
in 53 minutes and 17 seconds
......@@ -66,9 +66,9 @@ type FlowInsertDB a = ( AddUniqId a
data DocumentIdWithNgrams a b =
data DocumentIdWithNgrams ix a b =
DocumentIdWithNgrams
{ documentWithId :: Indexed NodeId a
{ documentWithId :: Indexed ix a
, documentNgrams :: HashMap b (Map NgramsType TermsWeight, TermsCount)
} deriving (Show)
......
......@@ -87,8 +87,8 @@ docNgrams lang ts doc =
documentIdWithNgrams :: Monad m
=> ( a -> m (HashMap.HashMap b (Map NgramsType TermsWeight, TermsCount)) )
-> Indexed NodeId a
-> m (DocumentIdWithNgrams a b)
-> Indexed ix a
-> m (DocumentIdWithNgrams ix a b)
documentIdWithNgrams f = toDocumentIdWithNgrams
where
toDocumentIdWithNgrams d = do
......@@ -97,18 +97,18 @@ documentIdWithNgrams f = toDocumentIdWithNgrams
mapDocumentIdWithNgrams :: Monad m
=> ( a -> m (HashMap.HashMap b (Map NgramsType TermsWeight, TermsCount)) )
-> [Indexed NodeId a]
-> m [DocumentIdWithNgrams a b]
-> [Indexed ix a]
-> m [DocumentIdWithNgrams ix a b]
mapDocumentIdWithNgrams f = mapM (documentIdWithNgrams f)
-- | Creates a NodeIdNgrams map out of the input 'DocumentIdWithNgrams' list.
-- TODO check optimization
mkNodeIdNgramsMap :: (Ord b, Hashable b)
=> [DocumentIdWithNgrams a b]
mkNodeIdNgramsMap :: forall ix a b. (Ord b, Hashable b, Ord ix)
=> [DocumentIdWithNgrams ix a b]
-> HashMap.HashMap b
(Map NgramsType
(Map NodeId (TermsWeight, TermsCount))
(Map ix (TermsWeight, TermsCount))
)
mkNodeIdNgramsMap = HashMap.unionsWith (DM.unionWith (DM.unionWith addTuples)) . fmap f
where
......@@ -116,8 +116,8 @@ mkNodeIdNgramsMap = HashMap.unionsWith (DM.unionWith (DM.unionWith addTuples)) .
-- same ngrams term has different ngrams types, the 'TermsCount'
-- for it (which is the number of times the terms appears in a
-- document) is copied over to all its types.
f :: DocumentIdWithNgrams a b
-> HashMap.HashMap b (Map NgramsType (Map NodeId (TermsWeight, TermsCount)))
f :: DocumentIdWithNgrams ix a b
-> HashMap.HashMap b (Map NgramsType (Map ix (TermsWeight, TermsCount)))
f d = fmap (\(ngramsTypeMap, cnt) -> fmap (\w -> DM.singleton nId (w, cnt)) ngramsTypeMap) $ documentNgrams d
where
nId = _index $ documentWithId d
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment