Commit b88373d3 authored by Grégoire Locqueville's avatar Grégoire Locqueville

Some "automatic" refactoring

Reduce code duplication and simplify a few things.

The changes in this commit were made without relying on any knowledge
about what the code means or does; the changes should be understandable
even without knowing the project.
parent 8008d6ee
...@@ -108,64 +108,40 @@ partitionsToClusterNodes setlist = setlist ...@@ -108,64 +108,40 @@ partitionsToClusterNodes setlist = setlist
& fmap (\(clusterId, nodeId) -> ClusterNode nodeId clusterId) -- Turn pairs into `ClusterNode`s & fmap (\(clusterId, nodeId) -> ClusterNode nodeId clusterId) -- Turn pairs into `ClusterNode`s
doSimilarityMap :: Similarity doSimilarityMap :: Similarity
-> Threshold -> Threshold
-> Strength -> Strength
-> HashMap (NgramsTerm, NgramsTerm) Int -> HashMap (NgramsTerm, NgramsTerm) Int -- cooccurrence map
-> ( Map (Int,Int) Double -> ( Map (Int,Int) Double -- weight map
, Map (Index, Index) Int , Map (Index, Index) Int -- cooccurrence map
, Map NgramsTerm Index , Map NgramsTerm Index -- ???
) )
doSimilarityMap similarityType threshold strength coocMap =
doSimilarityMap Conditional threshold strength myCooc = (distanceMap, toIndex ti myCooc', ti) (weightMap, toIndex ti coocMap', ti)
where where
myCooc' = Map.fromList $ HashMap.toList myCooc coocMap' = case similarityType of
Conditional -> Map.fromList $ HashMap.toList coocMap
(_diag, theMatrix) = Map.partitionWithKey (\(x,y) _ -> x == y) Distributional -> diag
$ Map.fromList (diag, theMatrix) = Map.partitionWithKey (\(x, y) _ -> x == y)
$ HashMap.toList myCooc
(ti, _it) = createIndices theMatrix
tiSize = Map.size ti
similarities = (\m -> m `seq` m)
$ (\m -> m `seq` measure Conditional m)
$ (\m -> m `seq` map2mat Square 0 tiSize m)
$ theMatrix `seq` toIndex ti theMatrix
links = round (let n :: Double = fromIntegral (Map.size ti) in 10 * n * (log n)^(2::Int))
distanceMap = Map.fromList
$ List.take links
$ (if strength == Weak then List.reverse else identity)
$ List.sortOn snd
$ Map.toList
$ Map.filter (> threshold)
$ similarities `seq` mat2map similarities
doSimilarityMap distriType threshold strength myCooc = (distanceMap, toIndex ti diag, ti)
where
-- TODO remove below
(diag, theMatrix) = Map.partitionWithKey (\(x,y) _ -> x == y)
$ Map.fromList $ Map.fromList
$ HashMap.toList myCooc $ HashMap.toList coocMap
(ti, _it) = createIndices theMatrix (ti, _it) = createIndices theMatrix
tiSize = Map.size ti similarities = (\m -> m `seq` measure similarityType m)
similarities = (\m -> m `seq` m)
$ (\m -> m `seq` measure distriType m)
$ (\m -> m `seq` map2mat Square 0 tiSize m) $ (\m -> m `seq` map2mat Square 0 tiSize m)
$ theMatrix `seq` toIndex ti theMatrix $ theMatrix `seq` toIndex ti theMatrix
tiSize = Map.size ti
links = round (let n :: Double = fromIntegral tiSize in n * (log n)^(2::Int)) links = let n = fromIntegral $ Map.size ti :: Double
factor = if similarityType == Conditional then 10 else 1
distanceMap = Map.fromList in round $ factor * n * log n^(2::Int)
$ List.take links filterMap = case similarityType of
$ (if strength == Weak then List.reverse else identity) Conditional -> Map.filter (> threshold)
$ List.sortOn snd Distributional -> edgesFilter . (\m -> m `seq` Map.filter (> threshold) m)
$ Map.toList weightMap = Map.fromList
$ edgesFilter $ List.take links
$ (\m -> m `seq` Map.filter (> threshold) m) $ (if strength == Weak then List.reverse else identity)
$ similarities `seq` mat2map similarities $ List.sortOn snd
$ Map.toList
$ filterMap
$ similarities `seq` mat2map similarities
---------------------------------------------------------- ----------------------------------------------------------
-- | From data to Graph -- | From data to Graph
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment