Commit cd4899c0 authored by Alexandre Delanoë's avatar Alexandre Delanoë

[OPTIM] Order1 similarity optimized (parallel computation)

parent f7b1528f
......@@ -11,6 +11,7 @@ module Gargantext.API.Ngrams.Types where
import Codec.Serialise (Serialise())
import Control.Category ((>>>))
import Control.DeepSeq (NFData)
import Control.Lens (makeLenses, makePrisms, Iso', iso, from, (.~), (?=), (#), to, folded, {-withIndex, ifolded,-} view, use, (^.), (^?), (%~), (.~), (%=), at, _Just, Each(..), itraverse_, both, forOf_, (?~))
import Control.Monad.State
import Data.Aeson hiding ((.=))
......@@ -121,7 +122,7 @@ instance (ToJSONKey a, ToSchema a) => ToSchema (MSet a) where
------------------------------------------------------------------------
newtype NgramsTerm = NgramsTerm { unNgramsTerm :: Text }
deriving (Ord, Eq, Show, Generic, ToJSONKey, ToJSON, FromJSON, Semigroup, Arbitrary, Serialise, ToSchema, Hashable)
deriving (Ord, Eq, Show, Generic, ToJSONKey, ToJSON, FromJSON, Semigroup, Arbitrary, Serialise, ToSchema, Hashable, NFData)
instance IsHashable NgramsTerm where
hash (NgramsTerm t) = hash t
......
......@@ -15,37 +15,34 @@ Motivation and definition of the @Conditional@ distance.
module Gargantext.Core.Methods.Distances.Conditional
where
import Control.Parallel.Strategies (parList, rdeepseq, using)
import Data.Hashable (Hashable)
import Data.List (unzip)
import Data.Maybe (catMaybes)
import Gargantext.Prelude
import qualified Data.HashMap.Strict as Map
import qualified Data.Set as Set
import Data.Hashable (Hashable)
import Control.DeepSeq (NFData)
type HashMap = Map.HashMap
------------------------------------------------------------------------
-- First version as first implementation
-- - unoptimized but qualitatively verified
getMax :: (a,a) -> Maybe Double -> Maybe Double -> Maybe ((a,a), Double)
getMax (i,j) (Just d) Nothing = Just ((i,j), d)
getMax (i,j) Nothing (Just d) = Just ((j,i), d)
getMax ij (Just di) (Just dj) = if di >= dj then getMax ij (Just di) Nothing
else getMax ij Nothing (Just dj)
getMax _ _ _ = Nothing
conditional :: (Ord a, Hashable a) => HashMap (a,a) Int -> HashMap (a,a) Double
conditional m' = Map.fromList $ catMaybes results
-- - qualitatively verified
-- - parallized as main optimization
conditional :: (Ord a, Hashable a, NFData a)
=> HashMap (a,a) Int
-> HashMap (a,a) Double
conditional m' = Map.fromList $ ((catMaybes results') `using` parList rdeepseq)
where
results = [ let
results' = [ let
ij = (/) <$> Map.lookup (i,j) m <*> Map.lookup (i,i) m
ji = (/) <$> Map.lookup (j,i) m <*> Map.lookup (j,j) m
in getMax (i,j) ij ji
| i <- keys
, j <- keys
, i < j
]
| i <- keys
, j <- keys
, i < j
]
-- Converting from Int to Double
m = Map.map fromIntegral m'
......@@ -54,113 +51,14 @@ conditional m' = Map.fromList $ catMaybes results
(x,y) = unzip $ Map.keys m
getMax :: (a,a)
-> Maybe Double
-> Maybe Double
-> Maybe ((a,a), Double)
getMax (i,j) (Just d) Nothing = Just ((i,j), d)
getMax (i,j) Nothing (Just d) = Just ((j,i), d)
getMax ij (Just di) (Just dj) = if di >= dj then getMax ij (Just di) Nothing
else getMax ij Nothing (Just dj)
getMax _ _ _ = Nothing
{-
import Data.List (sortOn)
import Data.Map (Map)
import Data.Matrix hiding (identity)
import Gargantext.Core.Viz.Graph.Utils
import Gargantext.Prelude
import qualified Data.Map as M
import qualified Data.Set as S
import qualified Data.Vector as V
------------------------------------------------------------------------
-- | Optimisation issue
toBeOptimized :: (Num a, Fractional a, Ord a) => Matrix a -> Matrix a
toBeOptimized m = proba Col m
------------------------------------------------------------------------
-- | Main Functions
-- Compute the probability from axis
-- x' = x / (sum Col x)
proba :: (Num a, Fractional a) => Axis -> Matrix a -> Matrix a
proba a m = mapOn a (\c x -> x / V.sum (axis a c m)) m
mapOn :: Axis -> (AxisId -> a -> a) -> Matrix a -> Matrix a
mapOn a f m = V.foldl' f' m (V.enumFromTo 1 (nOf a m))
where
f' m' c = mapOnly a f c m'
mapOnly :: Axis -> (AxisId -> a -> a) -> AxisId -> Matrix a -> Matrix a
mapOnly Col = mapCol
mapOnly Row = mapRow
mapAll :: (a -> a) -> Matrix a -> Matrix a
mapAll f m = mapOn Col (\_ -> f) m
---------------------------------------------------------------
-- | Compute a distance from axis
-- xs = (sum Col x') - x'
distFromSum :: (Num a, Fractional a)
=> Axis -> Matrix a -> Matrix a
distFromSum a m = mapOn a (\c x -> V.sum (axis a c m) - x) m
---------------------------------------------------------------
---------------------------------------------------------------
-- | To compute included/excluded or specific/generic scores
opWith :: (Fractional a1, Num a1)
=> (Matrix a2 -> t -> Matrix a1) -> Matrix a2 -> t -> Matrix a1
opWith op xs ys = mapAll (\x -> x / (2*n -1)) (xs `op` ys)
where
n = fromIntegral $ nOf Col xs
---------------------------------------------------------------
-------------------------------------------------------
conditional :: (Num a, Fractional a, Ord a) => Matrix a -> Matrix a
conditional m = filterMat (threshold m') m'
where
------------------------------------------------------------------------
-- | Main Operations
-- x' = x / (sum Col x)
x' = proba Col m
------------------------------------------------------------------------
-- xs = (sum Col x') - x'
xs = distFromSum Col x'
-- ys = (sum Row x') - x'
ys = distFromSum Row x'
------------------------------------------------------------------------
-- | Top included or excluded
ie = opWith (+) xs ys
-- ie = ( xs + ys) / (2 * (x.shape[0] - 1))
-- | Top specific or generic
sg = opWith (-) xs ys
-- sg = ( xs - ys) / (2 * (x.shape[0] - 1))
nodes_kept :: [Int]
nodes_kept = take k' $ S.toList
$ foldl' (\s (n1,n2) -> insert [n1,n2] s) S.empty
$ map fst
$ nodes_included k <> nodes_specific k
nodes_included n = take n $ sortOn snd $ toListsWithIndex ie
nodes_specific n = take n $ sortOn snd $ toListsWithIndex sg
insert as s = foldl' (\s' a -> S.insert a s') s as
k' = 2*k
k = 10
dico_nodes :: Map Int Int
dico_nodes = M.fromList $ zip ([1..] :: [Int]) nodes_kept
--dico_nodes_rev = M.fromList $ zip nodes_kept [1..]
m' = matrix (length nodes_kept)
(length nodes_kept)
(\(i,j) -> getElem ((M.!) dico_nodes i) ((M.!) dico_nodes j) x')
threshold m'' = V.minimum
$ V.map (\cId -> V.maximum $ getCol cId m'')
(V.enumFromTo 1 (nOf Col m'') )
filterMat t m'' = mapAll (\x -> filter' t x) m''
where
filter' t' x = case (x >= t') of
True -> x
False -> 0
------------------------------------------------------------------------
-}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment