Commit d5b2f371 authored by Alp Mestanogullari's avatar Alp Mestanogullari Committed by Alexandre Delanoë

improve performance of logDistributional, upgrade to accelerate 1.3 + lots of debug output

parent b77bb393
......@@ -128,6 +128,7 @@ library:
- Unique
- accelerate
- accelerate-arithmetic
- accelerate-llvm-native
- accelerate-utility
- aeson
- aeson-lens
......
......@@ -110,8 +110,9 @@ updateNode uId nId (UpdateNodeParamsGraph metric method) logStatus = do
, _scst_remaining = Just 1
, _scst_events = Just []
}
printDebug "Computing graph: " method
_ <- recomputeGraph uId nId method (Just metric) True
printDebug "Graph computed: " method
pure JobLog { _scst_succeeded = Just 2
, _scst_failed = Just 0
......
......@@ -14,14 +14,13 @@ Portability : POSIX
module Gargantext.Core.Methods.Distances
where
import Debug.Trace (trace)
import Data.Aeson
import Data.Array.Accelerate (Matrix)
import Data.Swagger
import GHC.Generics (Generic)
import Gargantext.Core.Methods.Distances.Accelerate.Conditional (measureConditional)
import Gargantext.Core.Methods.Distances.Accelerate.Distributional (logDistributional)
import Gargantext.Prelude (Ord, Eq, Int, Double, Show, ($), show)
import Gargantext.Prelude (Ord, Eq, Int, Double, Show)
import Prelude (Enum, Bounded, minBound, maxBound)
import Test.QuickCheck (elements)
import Test.QuickCheck.Arbitrary
......@@ -32,7 +31,7 @@ data Distance = Conditional | Distributional
measure :: Distance -> Matrix Int -> Matrix Double
measure Conditional x = measureConditional x
measure Distributional x = trace (show y) $ y
measure Distributional x = y
where
y = logDistributional x
......
......@@ -46,10 +46,16 @@ module Gargantext.Core.Methods.Distances.Accelerate.Distributional
-- import qualified Data.Foldable as P (foldl1)
-- import Debug.Trace (trace)
import Data.Array.Accelerate as A
import Data.Array.Accelerate.Interpreter (run)
-- import Data.Array.Accelerate.Interpreter (run)
import Data.Array.Accelerate.LLVM.Native (run) -- TODO: try runQ?
import Gargantext.Core.Methods.Matrix.Accelerate.Utils
import qualified Gargantext.Prelude as P
import Debug.Trace
import Prelude (show, mappend{- , String, (<>), fromIntegral, flip -})
import qualified Prelude
-- | `distributional m` returns the distributional distance between terms each
-- pair of terms as a matrix. The argument m is the matrix $[n_{ij}]_{i,j}$
-- where $n_{ij}$ is the coocccurrence between term $i$ and term $j$.
......@@ -84,10 +90,10 @@ import qualified Gargantext.Prelude as P
-- 8.333333333333333e-2, 4.6875e-2, 1.0, 0.25,
-- 0.3333333333333333, 5.7692307692307696e-2, 1.0, 1.0]
--
distributional :: Matrix Int -> Matrix Double
distributional m' = run result
distributional :: Matrix Int -> Acc (Matrix Double)
distributional m' = result
where
m = map fromIntegral $ use m'
m = map A.fromIntegral $ use m'
n = dim m'
diag_m = diag m
......@@ -116,7 +122,7 @@ distributional m' = run result
result = termDivNan z_1 z_2
logDistributional :: Matrix Int -> Matrix Double
logDistributional m = run
logDistributional m = trace ("logDistributional, dim=" `mappend` show n) . run
$ diagNull n
$ matMiniMax
$ logDistributional' n m
......@@ -124,11 +130,11 @@ logDistributional m = run
n = dim m
logDistributional' :: Int -> Matrix Int -> Acc (Matrix Double)
logDistributional' n m' = result
logDistributional' n m' = trace ("logDistributional'") result
where
-- From Matrix Int to Matrix Double, i.e :
-- m :: Matrix Int -> Matrix Double
m = map fromIntegral $ use m'
m = map A.fromIntegral $ use m'
-- Scalar. Sum of all elements of m.
to = the $ sum (flatten m)
......@@ -152,25 +158,39 @@ logDistributional' n m' = result
-- m_{i,j} = log(to * n_{i,j} / s_{i,j}) otherwise.
mi = (.*) (matrixEye n)
(map (lift1 (\x -> cond (x == 0) 0 (log (x * to)))) ((./) m ss))
-- mi_nnz :: Int
-- mi_nnz = flip indexArray Z . run $
-- foldAll (+) 0 $ map (\a -> ifThenElse (abs a < 10^(-6 :: Exp Int)) 0 1) mi
-- mi_total = n*n
-- reportMat :: String -> Int -> Int -> String
-- reportMat name nnz tot = name <> ": " <> show nnz <> "nnz / " <> show tot <>
-- " | " <> show pc <> "%"
-- where pc = 100 * Prelude.fromIntegral nnz / Prelude.fromIntegral tot :: Double
-- Tensor nxnxn. Matrix mi replicated along the 2nd axis.
w_1 = replicate (constant (Z :. All :. n :. All)) mi
-- w_1 = trace (reportMat "mi" mi_nnz mi_total) $ replicate (constant (Z :. All :. n :. All)) mi
-- w1_nnz :: Int
-- w1_nnz = flip indexArray Z . run $
-- foldAll (+) 0 $ map (\a -> ifThenElse (abs a < 10^(-6 :: Exp Int)) 0 1) w_1
-- w1_total = n*n*n
-- Tensor nxnxn. Matrix mi replicated along the 1st axis.
w_2 = replicate (constant (Z :. n :. All :. All)) mi
-- w_2 = trace (reportMat "w1" w1_nnz w1_total) $ replicate (constant (Z :. n :. All :. All)) mi
-- Tensor nxnxn.
w' = zipWith min w_1 w_2
-- w' = trace "w'" $ zipWith min w_1 w_2
-- A predicate that is true when the input (i, j, k) satisfy
-- k /= i AND k /= j
k_diff_i_and_j = lift1 (\(Z :. i :. j :. k) -> ((&&) ((/=) k i) ((/=) k j)))
-- k_diff_i_and_j = lift1 (\(Z :. i :. j :. k) -> ((&&) ((/=) k i) ((/=) k j)))
-- Matrix nxn.
sumMin = sum (condOrDefault k_diff_i_and_j 0 w')
sumMin = trace "sumMin" $ sumMin_go n mi -- sum (condOrDefault k_diff_i_and_j 0 w')
-- Matrix nxn. All columns are the same.
sumM = sum (condOrDefault k_diff_i_and_j 0 w_1)
sumM = trace "sumM" $ sumM_go n mi -- trace "sumM" $ sum (condOrDefault k_diff_i_and_j 0 w_1)
result = termDivNan sumMin sumM
......@@ -202,7 +222,7 @@ distributional'' m = -- run {- $ matMiniMax -}
$ filterWith 0 100
$ filter' 0
$ s_mi
$ map fromIntegral
$ map A.fromIntegral
{- from Int to Double -}
$ use m
{- push matrix in Accelerate type -}
......@@ -246,3 +266,70 @@ distriTest :: Int -> Matrix Double
distriTest n = logDistributional (theMatrixInt n)
-- * sparse utils
-- compact repr of "extend along an axis" op?
-- general sparse repr ?
type Extended sh = sh :. Int
data Ext where
Along1 :: Int -> Ext
Along2 :: Int -> Ext
along1 :: Int -> Ext
along1 = Along1
along2 :: Int -> Ext
along2 = Along2
type Delayed sh a = Exp sh -> Exp a
data ExtArr sh a = ExtArr
{ extSh :: Extended sh
, extFun :: Delayed (Extended sh) a
}
{-
w_1_{i, j, k} = mi_{i, k}
w_2_{i, j, k} = mi_{j, k}
w'_{i, j, k} = min w_1_{i, j, k} w_2_{i, j, k}
= min mi_{i, k} mi_{j, k}
w"_{i, j, k} = 0 if i = k or j = k
min mi_{i, k} mi_{j, k} otherwise
w_1'_{i, j, k} = 0 if i = k or j = k
mi_{i, k} otherwise
sumMin_{i, j} = sum_k of w"_{i, j, k}
= sum_k (k /= i && k /= j) of min mi_{i, k} mi_{j, k}
sumM_{i, j} = sum_k of w_1'_{i, j, k}
= sum_k (k /= i && k /= j) of mi_{i, k}
-}
sumM_go :: (Elt a, Num a) => Int -> Acc (Array DIM2 a) -> Acc (Array DIM2 a)
sumM_go n mi = generate (lift (Z :. n :. n)) $ \coord ->
let (Z :. i :. j) = unlift coord in
Prelude.sum
[ cond (constant k /= i && constant k /= j)
(mi ! lift (constant Z :. i :. constant k))
0
| k <- [0 .. n-1]
]
sumMin_go :: (Elt a, Num a, Ord a) => Int -> Acc (Array DIM2 a) -> Acc (Array DIM2 a)
sumMin_go n mi = generate (constant (Z :. n :. n)) $ \coord ->
let (Z :. i :. j) = unlift coord in
Prelude.sum
[ cond (constant k /= i && constant k /= j)
(min
(mi ! lift (constant Z :. i :. constant k))
(mi ! lift (constant Z :. j :. constant k))
)
0
| k <- [0 .. n-1]
]
......@@ -36,6 +36,8 @@ import Data.Array.Accelerate
import Data.Array.Accelerate.Interpreter (run)
import qualified Gargantext.Prelude as P
import Debug.Trace (trace)
-- | Matrix cell by cell multiplication
(.*) :: ( Shape ix
, Slice ix
......@@ -70,7 +72,7 @@ termDivNan :: ( Shape ix
=> Acc (Array ((ix :. Int) :. Int) a)
-> Acc (Array ((ix :. Int) :. Int) a)
-> Acc (Array ((ix :. Int) :. Int) a)
termDivNan = zipWith (\i j -> cond ((==) j 0) 0 ((/) i j))
termDivNan = trace "termDivNan" $ zipWith (\i j -> cond ((==) j 0) 0 ((/) i j))
(.-) :: ( Shape ix
, Slice ix
......@@ -108,7 +110,7 @@ matrixIdentity n' =
ones = fill (index1 n) 1
n = constant n'
in
permute const zeros (\(unindex1 -> i) -> index2 i i) ones
permute const zeros (\(unindex1 -> i) -> Just_ $ index2 i i) ones
matrixEye :: Num a => Dim -> Acc (Matrix a)
......@@ -117,11 +119,11 @@ matrixEye n' =
zeros = fill (index1 n) 0
n = constant n'
in
permute const ones (\(unindex1 -> i) -> index2 i i) zeros
permute const ones (\(unindex1 -> i) -> Just_ $ index2 i i) zeros
diagNull :: Num a => Dim -> Acc (Matrix a) -> Acc (Matrix a)
diagNull n m = zipWith (*) m (matrixEye n)
diagNull n m = trace ("diagNull") $ zipWith (*) m (matrixEye n)
-- Returns an N-dimensional array with the values of x for the indices where
......@@ -132,7 +134,7 @@ condOrDefault
condOrDefault theCond def x = permute const zeros filterInd x
where
zeros = fill (shape x) (def)
filterInd ix = (cond (theCond ix)) ix ignore
filterInd ix = (cond (theCond ix)) (Just_ ix) Nothing_
-----------------------------------------------------------------------
_runExp :: Elt e => Exp e -> e
......@@ -161,7 +163,7 @@ matrix n l = fromList (Z :. n :. n) l
-- >>> rank (matrix 3 ([1..] :: [Int]))
-- 2
rank :: (Matrix a) -> Int
rank m = arrayRank $ arrayShape m
rank m = arrayRank m
-----------------------------------------------------------------------
-- | Dimension of a square Matrix
......@@ -240,7 +242,7 @@ divByDiag d mat = zipWith (/) mat (replicate (constant (Z :. (d :: Int) :. All))
matMiniMax :: (Elt a, Ord a, P.Num a)
=> Acc (Matrix a)
-> Acc (Matrix a)
matMiniMax m = filterWith' miniMax' (constant 0) m
matMiniMax m = trace "matMiniMax" $ filterWith' miniMax' (constant 0) m
where
miniMax' = the $ maximum $ minimum m
......@@ -276,7 +278,7 @@ nullOf n' dir =
zeros = fill (index2 n n) 0
n = constant n'
in
permute const ones ( lift1 ( \(Z :. (i :: Exp Int) :. (_j:: Exp Int))
permute const ones ( Just_ . lift1 ( \(Z :. (i :: Exp Int) :. (_j:: Exp Int))
-> case dir of
MatCol m -> (Z :. i :. m)
MatRow m -> (Z :. m :. i)
......
......@@ -9,6 +9,7 @@ Portability : POSIX
-}
{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE OverloadedLists #-} -- allows to write Map and HashMap as lists
{-# LANGUAGE TypeOperators #-}
......@@ -125,6 +126,7 @@ recomputeGraph :: FlowCmdM env err m
-> Bool
-> m Graph
recomputeGraph _uId nId method maybeDistance force = do
printDebug "recomputeGraph begins" (nId, method)
nodeGraph <- getNodeWith nId (Proxy :: Proxy HyperdataGraph)
let
graph = nodeGraph ^. node_hyperdata . hyperdataGraph
......@@ -140,15 +142,22 @@ recomputeGraph _uId nId method maybeDistance force = do
mcId <- getClosestParentIdByType nId NodeCorpus
let cId = maybe (panic "[G.V.G.API] Node has no parent") identity mcId
printDebug "recomputeGraph corpus" cId
listId <- defaultList cId
printDebug "recomputeGraph list" listId
repo <- getRepo [listId]
let v = repo ^. unNodeStory . at listId . _Just . a_version
printDebug "recomputeGraph got repo, version: " v
let computeG mt = do
printDebug "about to run computeGraph" ()
g <- computeGraph cId method similarity NgramsTerms repo
seq g $ printDebug "graph computed" ()
let g' = set graph_metadata mt g
_ <- updateHyperdata nId (HyperdataGraph (Just g') camera)
seq g' $ printDebug "computed graph with new metadata" ()
nentries <- updateHyperdata nId (HyperdataGraph (Just g') camera)
printDebug "graph hyperdata updated" ("entries" :: [Char], nentries)
pure g'
case graph of
......@@ -171,18 +180,22 @@ computeGraph :: FlowCmdM env err m
-> NodeListStory
-> m Graph
computeGraph cId method d nt repo = do
printDebug "computeGraph" (cId, method, nt)
lId <- defaultList cId
printDebug "computeGraph got list id: " lId
lIds <- selectNodesWithUsername NodeList userMaster
printDebug "computeGraph got nodes with username: " userMaster
let ngs = filterListWithRoot [MapTerm]
$ mapTermListRoot [lId] nt repo
myCooc <- HashMap.filter (>1) -- Removing the hapax (ngrams with 1 cooc)
!myCooc <- HashMap.filter (>1) -- Removing the hapax (ngrams with 1 cooc)
<$> getCoocByNgrams (Diagonal True)
<$> groupNodesByNgrams ngs
<$> getContextsByNgramsOnlyUser cId (lIds <> [lId]) nt (HashMap.keys ngs)
printDebug "computeGraph got coocs" (HashMap.size myCooc)
graph <- liftBase $ cooc2graphWith method d 0 myCooc
printDebug "computeGraph got graph" ()
--listNgrams <- getListNgrams [lId] nt
--let graph' = mergeGraphNgrams graph (Just listNgrams)
......
......@@ -14,6 +14,8 @@ Portability : POSIX
module Gargantext.Core.Viz.Graph.Tools
where
import Debug.Trace
import Data.Aeson
import Data.HashMap.Strict (HashMap)
import Data.Map (Map)
......@@ -105,8 +107,8 @@ cooc2graphWith' :: ToComId a
-> HashMap (NgramsTerm, NgramsTerm) Int
-> IO Graph
cooc2graphWith' doPartitions distance threshold myCooc = do
let
(distanceMap, diag, ti) = doDistanceMap distance threshold myCooc
let (distanceMap, diag, ti) = doDistanceMap distance threshold myCooc
distanceMap `seq` trace "distanceMap OK" diag `seq` trace "diag OK" ti `seq` printDebug "ti done" ()
--{- -- Debug
-- saveAsFileDebug "/tmp/distanceMap" distanceMap
......@@ -120,7 +122,7 @@ cooc2graphWith' doPartitions distance threshold myCooc = do
, "Maybe you should add more Map Terms in your list"
, "Tutorial: link todo"
]
partitions `seq` printDebug "partitions done" ()
let
nodesApprox :: Int
nodesApprox = n'
......@@ -129,7 +131,8 @@ cooc2graphWith' doPartitions distance threshold myCooc = do
n' = Set.size $ Set.fromList $ as <> bs
bridgeness' = bridgeness (fromIntegral nodesApprox) partitions distanceMap
confluence' = confluence (Map.keys bridgeness') 3 True False
seq bridgeness' $ printDebug "bridgeness OK" ()
seq confluence' $ printDebug "confluence OK" ()
pure $ data2graph ti diag bridgeness' confluence' partitions
......@@ -150,20 +153,21 @@ doDistanceMap Distributional threshold myCooc = (distanceMap, toIndex ti diag, t
(ti, _it) = createIndices theMatrix
tiSize = Map.size ti
similarities = measure Distributional
$ map2mat Square 0 tiSize
$ toIndex ti theMatrix
similarities = (\m -> m `seq` trace "measure done" m)
$ (\m -> m `seq` trace "map2mat done" (measure Distributional m))
$ (\m -> m `seq` trace "toIndex done" (map2mat Square 0 tiSize m))
$ theMatrix `seq` trace "theMatrix done" (toIndex ti theMatrix)
links = round (let n :: Double = fromIntegral tiSize in n * (log n)^(2::Int))
distanceMap = Map.fromList
distanceMap = Map.fromList . trace "fromList" identity
$ List.take links
$ List.reverse
$ List.sortOn snd
$ Map.toList
$ edgesFilter
$ Map.filter (> threshold)
$ mat2map similarities
$ (\m -> m `seq` trace "map2map done" (Map.filter (> threshold) m))
$ similarities `seq` mat2map (trace "similarities done" similarities)
doDistanceMap Conditional threshold myCooc = (distanceMap, toIndex ti myCooc', ti)
where
......
......@@ -25,16 +25,20 @@ import Gargantext.Database.Prelude (Cmd, mkCmd, JSONB)
import Gargantext.Database.Query.Table.Node
import Gargantext.Database.Query.Table.Node.Error
import Debug.Trace (trace)
updateHyperdata :: ToJSON a => NodeId -> a -> Cmd err Int64
updateHyperdata i h = mkCmd $ \c -> runUpdate_ c (updateHyperdataQuery i h)
updateHyperdata i h = mkCmd $ \c -> putStrLn "before runUpdate_" >>
runUpdate_ c (updateHyperdataQuery i h) >>= \res ->
putStrLn "after runUpdate_" >> return res
updateHyperdataQuery :: ToJSON a => NodeId -> a -> Update Int64
updateHyperdataQuery i h = Update
updateHyperdataQuery i h = seq h' $ trace "updateHyperdataQuery: encoded JSON" $ Update
{ uTable = nodeTable
, uUpdateWith = updateEasy (\ (Node _ni _nh _nt _nu _np _nn _nd _h)
-> Node _ni _nh _nt _nu _np _nn _nd h'
-> trace "updating mate" $ Node _ni _nh _nt _nu _np _nn _nd h'
)
, uWhere = (\row -> _node_id row .== pgNodeId i )
, uWhere = (\row -> trace "uWhere" $ _node_id row .== pgNodeId i )
, uReturning = rCount
}
where h' = (sqlJSONB $ cs $ encode $ h)
......
resolver:
url: https://raw.githubusercontent.com/commercialhaskell/stackage-snapshots/master/lts/18/28.yaml
flags: {}
flags:
accelerate:
debug: true
extra-package-dbs: []
skip-ghc-check: true
packages:
- .
#- 'deps/gargantext-graph'
#- 'deps/haskell-opaleye'
......@@ -34,7 +35,7 @@ extra-deps:
- git: https://gitlab.iscpif.fr/gargantext/haskell-gargantext-prelude.git
commit: 08096a4913572cf22762fa77613340207ec6d9fd
- git: https://gitlab.iscpif.fr/gargantext/gargantext-graph.git
commit: f68f9e78ff4302f53d0855190574c2d818a00b4d
commit: 13131f5173e2e2ab35b968e53f0feaeee13ad8ac
# Data Mining Libs
- git: https://github.com/delanoe/data-time-segment.git
commit: 10a416b9f6c443866b36479c3441ebb3bcdeb7ef
......@@ -102,12 +103,17 @@ extra-deps:
commit: 76b795c1eaca37f43418d07da9fbdf5f4e7d8f5c
# Accelerate Linear Algebra and specific instances
# (UndecidableInstances for newer GHC version)
- git: https://gitlab.iscpif.fr/anoe/accelerate.git
commit: f5c0e0071ec7b6532f9a9cd3eb33d14f340fbcc9
- git: https://gitlab.iscpif.fr/anoe/accelerate-utility.git
commit: 83ada76e78ac10d9559af8ed6bd4064ec81308e4
- accelerate-arithmetic-1.0.0.1@sha256:555639232aa5cad411e89247b27871d09352b987a754230a288c690b6de6d888,2096
- git: https://github.com/alpmestan/accelerate.git
commit: 199a1f6594406229d3c5f402443b09d62f92e640
- git: https://github.com/alpmestan/accelerate-arithmetic.git
commit: a110807651036ca2228a76507ee35bbf7aedf87a
- git: https://gitlab.iscpif.fr/amestanogullari/accelerate-utility.git
commit: a3875fe652d3bb5acb522674c22c6c814c1b4ad0
- git: https://github.com/alpmestan/accelerate-llvm.git
commit: 14629a850bb10fd1401e0ac1998df52c86e5c603
subdirs:
- accelerate-llvm/
- accelerate-llvm-native/
- git: https://github.com/rspeer/wikiparsec.git
commit: 9637a82344bb70f7fa8f02e75db3c081ccd434ce
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment