Commit d35f64fd authored by Alfredo Di Napoli's avatar Alfredo Di Napoli

Generalise index in DocumentIdWithNgrams

parent f280b7e1
Pipeline #7619 passed with stages
in 53 minutes and 17 seconds
...@@ -66,9 +66,9 @@ type FlowInsertDB a = ( AddUniqId a ...@@ -66,9 +66,9 @@ type FlowInsertDB a = ( AddUniqId a
data DocumentIdWithNgrams a b = data DocumentIdWithNgrams ix a b =
DocumentIdWithNgrams DocumentIdWithNgrams
{ documentWithId :: Indexed NodeId a { documentWithId :: Indexed ix a
, documentNgrams :: HashMap b (Map NgramsType TermsWeight, TermsCount) , documentNgrams :: HashMap b (Map NgramsType TermsWeight, TermsCount)
} deriving (Show) } deriving (Show)
......
...@@ -87,8 +87,8 @@ docNgrams lang ts doc = ...@@ -87,8 +87,8 @@ docNgrams lang ts doc =
documentIdWithNgrams :: Monad m documentIdWithNgrams :: Monad m
=> ( a -> m (HashMap.HashMap b (Map NgramsType TermsWeight, TermsCount)) ) => ( a -> m (HashMap.HashMap b (Map NgramsType TermsWeight, TermsCount)) )
-> Indexed NodeId a -> Indexed ix a
-> m (DocumentIdWithNgrams a b) -> m (DocumentIdWithNgrams ix a b)
documentIdWithNgrams f = toDocumentIdWithNgrams documentIdWithNgrams f = toDocumentIdWithNgrams
where where
toDocumentIdWithNgrams d = do toDocumentIdWithNgrams d = do
...@@ -97,18 +97,18 @@ documentIdWithNgrams f = toDocumentIdWithNgrams ...@@ -97,18 +97,18 @@ documentIdWithNgrams f = toDocumentIdWithNgrams
mapDocumentIdWithNgrams :: Monad m mapDocumentIdWithNgrams :: Monad m
=> ( a -> m (HashMap.HashMap b (Map NgramsType TermsWeight, TermsCount)) ) => ( a -> m (HashMap.HashMap b (Map NgramsType TermsWeight, TermsCount)) )
-> [Indexed NodeId a] -> [Indexed ix a]
-> m [DocumentIdWithNgrams a b] -> m [DocumentIdWithNgrams ix a b]
mapDocumentIdWithNgrams f = mapM (documentIdWithNgrams f) mapDocumentIdWithNgrams f = mapM (documentIdWithNgrams f)
-- | Creates a NodeIdNgrams map out of the input 'DocumentIdWithNgrams' list. -- | Creates a NodeIdNgrams map out of the input 'DocumentIdWithNgrams' list.
-- TODO check optimization -- TODO check optimization
mkNodeIdNgramsMap :: (Ord b, Hashable b) mkNodeIdNgramsMap :: forall ix a b. (Ord b, Hashable b, Ord ix)
=> [DocumentIdWithNgrams a b] => [DocumentIdWithNgrams ix a b]
-> HashMap.HashMap b -> HashMap.HashMap b
(Map NgramsType (Map NgramsType
(Map NodeId (TermsWeight, TermsCount)) (Map ix (TermsWeight, TermsCount))
) )
mkNodeIdNgramsMap = HashMap.unionsWith (DM.unionWith (DM.unionWith addTuples)) . fmap f mkNodeIdNgramsMap = HashMap.unionsWith (DM.unionWith (DM.unionWith addTuples)) . fmap f
where where
...@@ -116,8 +116,8 @@ mkNodeIdNgramsMap = HashMap.unionsWith (DM.unionWith (DM.unionWith addTuples)) . ...@@ -116,8 +116,8 @@ mkNodeIdNgramsMap = HashMap.unionsWith (DM.unionWith (DM.unionWith addTuples)) .
-- same ngrams term has different ngrams types, the 'TermsCount' -- same ngrams term has different ngrams types, the 'TermsCount'
-- for it (which is the number of times the terms appears in a -- for it (which is the number of times the terms appears in a
-- document) is copied over to all its types. -- document) is copied over to all its types.
f :: DocumentIdWithNgrams a b f :: DocumentIdWithNgrams ix a b
-> HashMap.HashMap b (Map NgramsType (Map NodeId (TermsWeight, TermsCount))) -> HashMap.HashMap b (Map NgramsType (Map ix (TermsWeight, TermsCount)))
f d = fmap (\(ngramsTypeMap, cnt) -> fmap (\w -> DM.singleton nId (w, cnt)) ngramsTypeMap) $ documentNgrams d f d = fmap (\(ngramsTypeMap, cnt) -> fmap (\w -> DM.singleton nId (w, cnt)) ngramsTypeMap) $ documentNgrams d
where where
nId = _index $ documentWithId d nId = _index $ documentWithId d
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment