Commit d7fd1875 authored by Alexandre Delanoë's avatar Alexandre Delanoë

[FEAT/Distances] Accelerate lib for GPU: conditional and distributional. Needs behavioral tests.

parent 46330fdd
...@@ -26,9 +26,12 @@ import Data.Text (unpack) ...@@ -26,9 +26,12 @@ import Data.Text (unpack)
import Gargantext.Prelude import Gargantext.Prelude
import Gargantext.API (startGargantext, startGargantextMock) import Gargantext.API (startGargantext, startGargantextMock)
--------------------------------------------------------
-- Tests
import qualified Gargantext.Graph.Utils as U import qualified Gargantext.Graph.Utils as U
import qualified Gargantext.Graph.Distances.Conditional as C import qualified Gargantext.Graph.Distances.Conditional as C
import qualified Gargantext.Graph.Distances.Distributional as D import qualified Gargantext.Graph.Distances.Distributional as D
import qualified Gargantext.Graph.Distances.Matrice as M
-------------------------------------------------------- --------------------------------------------------------
data Mode = Dev | Mock | Prod data Mode = Dev | Mock | Prod
...@@ -48,8 +51,9 @@ instance ParseRecord (MyOptions Wrapped) ...@@ -48,8 +51,9 @@ instance ParseRecord (MyOptions Wrapped)
deriving instance Show (MyOptions Unwrapped) deriving instance Show (MyOptions Unwrapped)
main :: IO () main :: IO ()
main = putStrLn $ show $ C.conditional U.m1 main = do
--main = putStrLn $ show $ map show $ take 10 $ D.distributional U.m1 putStrLn $ show $ M.conditional $ M.myMat 10
--putStrLn $ show $ M.size' $ M.myMat 100
main' :: IO () main' :: IO ()
main' = do main' = do
......
...@@ -37,6 +37,7 @@ library: ...@@ -37,6 +37,7 @@ library:
- Gargantext.Graph.Utils - Gargantext.Graph.Utils
- Gargantext.Graph.Distances.Conditional - Gargantext.Graph.Distances.Conditional
- Gargantext.Graph.Distances.Distributional - Gargantext.Graph.Distances.Distributional
- Gargantext.Graph.Distances.Matrice
- Gargantext.Ngrams - Gargantext.Ngrams
- Gargantext.Ngrams.Analysis - Gargantext.Ngrams.Analysis
- Gargantext.Ngrams.TFICF - Gargantext.Ngrams.TFICF
...@@ -61,6 +62,7 @@ library: ...@@ -61,6 +62,7 @@ library:
- Gargantext.Utils.Prefix - Gargantext.Utils.Prefix
dependencies: dependencies:
- QuickCheck - QuickCheck
- accelerate
- aeson - aeson
- aeson-lens - aeson-lens
- aeson-pretty - aeson-pretty
......
...@@ -34,10 +34,55 @@ import qualified Data.Vector as V ...@@ -34,10 +34,55 @@ import qualified Data.Vector as V
import Gargantext.Prelude import Gargantext.Prelude
import Gargantext.Graph.Utils import Gargantext.Graph.Utils
------------------------------------------------------------------------
------------------------------------------------------------------------
-- | Optimisation issue
toBeOptimized :: (Num a, Fractional a, Ord a) => Matrix a -> Matrix a
toBeOptimized m = proba Col m
------------------------------------------------------------------------ ------------------------------------------------------------------------
-- | Main Functions
-- Compute the probability from axis
-- x' = x / (sum Col x)
proba :: (Num a, Fractional a) => Axis -> Matrix a -> Matrix a
proba a m = mapOn a (\c x -> x / V.sum (axis a c m)) m
mapOn :: Axis -> (AxisId -> a -> a) -> Matrix a -> Matrix a
mapOn a f m = V.foldl' f' m (V.enumFromTo 1 (nOf a m))
where
f' m' c = mapOnly a f c m'
mapOnly :: Axis -> (AxisId -> a -> a) -> AxisId -> Matrix a -> Matrix a
mapOnly Col = mapCol
mapOnly Row = mapRow
mapAll :: (a -> a) -> Matrix a -> Matrix a
mapAll f m = mapOn Col (\_ -> f) m
---------------------------------------------------------------
-- | Compute a distance from axis
-- xs = (sum Col x') - x'
distFromSum :: (Num a, Fractional a)
=> Axis -> Matrix a -> Matrix a
distFromSum a m = mapOn a (\c x -> V.sum (axis a c m) - x) m
---------------------------------------------------------------
---------------------------------------------------------------
-- | To compute included/excluded or specific/generic scores
opWith :: (Fractional a1, Num a1)
=> (Matrix a2 -> t -> Matrix a1) -> Matrix a2 -> t -> Matrix a1
opWith op xs ys = mapAll (\x -> x / (2*n -1)) (xs `op` ys)
where
n = fromIntegral $ nOf Col xs
---------------------------------------------------------------
------------------------------------------------------- -------------------------------------------------------
conditional :: (Num a, Fractional a, Ord a) => Matrix a -> Matrix a conditional :: (Num a, Fractional a, Ord a) => Matrix a -> Matrix a
conditional m = x' -- filter (threshold m') m' conditional m = filter (threshold m') m'
where where
------------------------------------------------------------------------ ------------------------------------------------------------------------
-- | Main Operations -- | Main Operations
...@@ -88,26 +133,3 @@ conditional m = x' -- filter (threshold m') m' ...@@ -88,26 +133,3 @@ conditional m = x' -- filter (threshold m') m'
False -> 0 False -> 0
------------------------------------------------------------------------ ------------------------------------------------------------------------
------------------------------------------------------------------------
-- | Main Functions
-- Compute the probability from axis
-- x' = x / (sum Col x)
proba :: (Num a, Fractional a) => Axis -> Matrix a -> Matrix a
proba a m = mapOn a (\c x -> x / V.sum (axis a c m)) m
---------------------------------------------------------------
-- | Compute a distance from axis
-- xs = (sum Col x') - x'
distFromSum :: (Num a, Fractional a)
=> Axis -> Matrix a -> Matrix a
distFromSum a m = mapOn a (\c x -> V.sum (axis a c m) - x) m
---------------------------------------------------------------
---------------------------------------------------------------
-- | To compute included/excluded or specific/generic scores
opWith :: (Fractional a1, Num a1)
=> (Matrix a2 -> t -> Matrix a1) -> Matrix a2 -> t -> Matrix a1
opWith op xs ys = mapAll (\x -> x / (2*n -1)) (xs `op` ys)
where
n = fromIntegral $ nOf Col xs
---------------------------------------------------------------
...@@ -7,7 +7,7 @@ Maintainer : team@gargantext.org ...@@ -7,7 +7,7 @@ Maintainer : team@gargantext.org
Stability : experimental Stability : experimental
Portability : POSIX Portability : POSIX
Motivation and definition of the @Conditional@ distance. Motivation and definition of the @Distributional@ distance.
-} -}
{-# LANGUAGE BangPatterns #-} {-# LANGUAGE BangPatterns #-}
...@@ -64,7 +64,7 @@ mi m = matrix c r createMat ...@@ -64,7 +64,7 @@ mi m = matrix c r createMat
where where
(c,r) = (nOf Col m, nOf Row m) (c,r) = (nOf Col m, nOf Row m)
createMat (x,y) = doMi x y m createMat (x,y) = doMi x y m
doMi x y m = if x == y then 0 else (nonNegative $ log (doMi' x y m)) doMi x y m = if x == y then 0 else (max (log (doMi' x y m)) 0 )
doMi' x y m = (getElem x y m) / ( cross x y m / total m ) doMi' x y m = (getElem x y m) / ( cross x y m / total m )
...@@ -79,9 +79,6 @@ ax a i j m = dropAt j' $ axis a i' m ...@@ -79,9 +79,6 @@ ax a i j m = dropAt j' $ axis a i' m
j' = mod r j + 1 j' = mod r j + 1
(c,r) = (nOf Col m, nOf Row m) (c,r) = (nOf Col m, nOf Row m)
nonNegative :: (Ord a, Num a) => a -> a
nonNegative x = if x > 0 then x else 0
miniMax :: (Ord a) => Matrix a -> a miniMax :: (Ord a) => Matrix a -> a
miniMax m = V.minimum $ V.map (\c -> V.maximum $ getCol c m) (V.enumFromTo 1 (nOf Col m)) miniMax m = V.minimum $ V.map (\c -> V.maximum $ getCol c m) (V.enumFromTo 1 (nOf Col m))
......
{-|
Module : Gargantext.Graph.Distances.Matrix
Description :
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
Motivation and definition of the @Conditional@ distance.
-}
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE TypeFamilies #-}
module Gargantext.Graph.Distances.Matrice
where
import Data.Array.Accelerate.Data.Bits
import Data.Array.Accelerate.Interpreter
import Data.Array.Accelerate
import Data.Array.Accelerate.Smart
import Data.Array.Accelerate.Type
import Data.Array.Accelerate.Array.Sugar (fromArr, Array, Z)
import Data.Maybe (Maybe(Just))
import qualified Gargantext.Prelude as P
import qualified Data.Array.Accelerate.Array.Representation as Repr
matrix :: Elt c => Int -> [c] -> Matrix c
matrix n l = fromList (Z :. n :. n) l
myMat :: Int -> Matrix Double
myMat n = matrix n [1..]
-- | Two ways to get the rank (as documentation)
rank :: (Matrix Double) -> Int
rank m = arrayRank $ arrayShape m
rank' :: (Matrix Double) -> Int
rank' m = n
where
Z :. _ :. n = arrayShape m
-----------------------------------------------------------------------
-- | Conditional Distance
type Rank = Int
proba :: Rank -> Acc (Matrix Double) -> Acc (Matrix Double)
proba r mat = zipWith (/) mat (mkSum r mat)
mkSum :: Rank -> Acc (Matrix Double) -> Acc (Matrix Double)
mkSum r mat = replicate (constant (Z :. (r :: Int) :. All))
$ fold (+) 0 mat
type Matrix' a = Acc (Matrix a)
conditional :: Matrix Double -> (Matrix Double, Matrix Double)
conditional m = (run $ ie (use m), run $ sg (use m))
where
r :: Rank
r = rank' m
xs :: Matrix' Double -> Matrix' Double
xs mat = zipWith (-) (proba r mat) (mkSum r $ proba r mat)
ys :: Acc (Matrix Double) -> Acc (Matrix Double)
ys mat = zipWith (-) (proba r mat) (mkSum r $ transpose $ proba r mat)
ie :: Matrix' Double -> Matrix' Double
ie mat = map (\x -> x / (2*n-1)) $ zipWith (+) (xs mat) (ys mat)
sg :: Acc (Matrix Double) -> Acc (Matrix Double)
sg mat = map (\x -> x / (2*n-1)) $ zipWith (-) (xs mat) (ys mat)
n :: Exp Double
n = P.fromIntegral r
-- filter with threshold
-----------------------------------------------------------------------
-- | Distributional Distance
distributional :: Matrix Double -> Matrix Double
distributional m = run $ filter $ ri (use m)
where
n = rank m
filter m = zipWith (\a b -> max a b) m (transpose m)
--miniMax m = fold minimum $ fold maximum m
ri mat = zipWith (/) mat1 mat2
where
mat1 = mkSum n $ zipWith min (mi mat) (mi $ transpose mat)
mat2 = mkSum n mat
mi m' = zipWith (\a b -> max (log $ a/b) 0) m'
$ zipWith (/) (crossProduct m') (total m')
total m'' = replicate (constant (Z :. n :. n)) $ fold (+) 0 $ fold (+) 0 m''
crossProduct m = zipWith (*) (cross m ) (cross (transpose m))
cross mat = zipWith (-) (mkSum n mat) (mat)
...@@ -32,6 +32,7 @@ import qualified Data.List as L ...@@ -32,6 +32,7 @@ import qualified Data.List as L
import Gargantext.Prelude import Gargantext.Prelude
------------------------------------------------------------------------ ------------------------------------------------------------------------
-- | Some utils to build the matrix from cooccurrence results
type Distance = Double type Distance = Double
type Cooc = Int type Cooc = Int
...@@ -117,14 +118,13 @@ data Axis = Col | Row ...@@ -117,14 +118,13 @@ data Axis = Col | Row
--divide :: Fractional a => Matrix a -> Matrix a -> Matrix a --divide :: Fractional a => Matrix a -> Matrix a -> Matrix a
--divide a b = a `multStd` (div b) --divide a b = a `multStd` (div b)
total :: Num a => Matrix a -> a
total m = V.sum $ V.map (\c -> V.sum (getCol c m)) (V.enumFromTo 1 (nOf Col m))
------------------------------------------------------------------------ ------------------------------------------------------------------------
-- | Matrix functions -- | Matrix functions
type AxisId = Int type AxisId = Int
total :: Num a => Matrix a -> a
total m = V.sum $ V.map (\c -> V.sum (getCol c m)) (V.enumFromTo 1 (nOf Col m))
nOf :: Axis -> Matrix a -> Int nOf :: Axis -> Matrix a -> Int
nOf Row = nrows nOf Row = nrows
nOf Col = ncols nOf Col = ncols
...@@ -133,21 +133,6 @@ axis :: Axis -> AxisId -> Matrix a -> Vector a ...@@ -133,21 +133,6 @@ axis :: Axis -> AxisId -> Matrix a -> Vector a
axis Col = getCol axis Col = getCol
axis Row = getRow axis Row = getRow
--mapOn' :: Axis -> (a -> a) -> Matrix a -> Matrix a
--mapOn' a f m = foldl' (\m' aId -> mapOn a (aId f) m') m [1.. (nOf a m)]
mapOn :: Axis -> (AxisId -> a -> a) -> Matrix a -> Matrix a
mapOn a f m = V.foldl' f' m (V.enumFromTo 1 (nOf a m))
where
f' m' c = mapOnly a f c m'
mapOnly :: Axis -> (AxisId -> a -> a) -> AxisId -> Matrix a -> Matrix a
mapOnly Col = mapCol
mapOnly Row = mapRow
mapAll :: (a -> a) -> Matrix a -> Matrix a
mapAll f m = mapOn Col (\_ -> f) m
toListsWithIndex :: Matrix a -> [((Int, Int), a)] toListsWithIndex :: Matrix a -> [((Int, Int), a)]
toListsWithIndex m = concat' $ zip [1..] $ map (\c -> zip [1..] c) $ toLists m toListsWithIndex m = concat' $ zip [1..] $ map (\c -> zip [1..] c) $ toLists m
...@@ -156,7 +141,6 @@ toListsWithIndex m = concat' $ zip [1..] $ map (\c -> zip [1..] c) $ toLists m ...@@ -156,7 +141,6 @@ toListsWithIndex m = concat' $ zip [1..] $ map (\c -> zip [1..] c) $ toLists m
concat' xs = L.concat $ map (\(x, ys) -> map (\(y, a) -> ((x,y), a)) ys ) xs concat' xs = L.concat $ map (\(x, ys) -> map (\(y, a) -> ((x,y), a)) ys ) xs
-- | For tests only, to be removed -- | For tests only, to be removed
m1 :: Matrix Double m1 :: Matrix Double
m1 = fromList 300 300 [1..] m1 = fromList 300 300 [1..]
......
...@@ -75,37 +75,37 @@ data FileFormat = WOS -- Implemented (ISI Format) ...@@ -75,37 +75,37 @@ data FileFormat = WOS -- Implemented (ISI Format)
-- TODO: to debug maybe add the filepath in error message -- TODO: to debug maybe add the filepath in error message
parse :: FileFormat -> FilePath -> IO ([ParseError], [[(Text, Text)]]) --parse :: FileFormat -> FilePath -> IO ([ParseError], [[(Text, Text)]])
parse format path = do --parse format path = do
files <- case takeExtension path of -- files <- case takeExtension path of
".zip" -> openZip path -- ".zip" -> openZip path
_ -> pure <$> DB.readFile path -- _ -> pure <$> DB.readFile path
(as, bs) <- partitionEithers <$> mapConcurrently (runParser format) files -- (as, bs) <- partitionEithers <$> mapConcurrently (runParser format) files
pure (as, map toText $ concat bs) -- pure (as, map toText $ concat bs)
where -- where
-- TODO : decode with bayesian inference on encodings -- -- TODO : decode with bayesian inference on encodings
toText = map (\(a,b) -> (decodeUtf8 a, decodeUtf8 b)) -- toText = map (\(a,b) -> (decodeUtf8 a, decodeUtf8 b))
--
--
-- | withParser: ---- | withParser:
-- According the format of the text, choosing the right parser. ---- According the format of the text, choosing the right parser.
-- TODO withParser :: FileFormat -> Parser [Document] ---- TODO withParser :: FileFormat -> Parser [Document]
withParser :: FileFormat -> Parser [[(DB.ByteString, DB.ByteString)]] --withParser :: FileFormat -> Parser [[(DB.ByteString, DB.ByteString)]]
withParser WOS = wosParser --withParser WOS = wosParser
--withParser DOC = docParser ----withParser DOC = docParser
--withParser ODT = odtParser ----withParser ODT = odtParser
--withParser XML = xmlParser ----withParser XML = xmlParser
--withParser _ = error "[ERROR] Parser not implemented yet" ----withParser _ = error "[ERROR] Parser not implemented yet"
--
runParser :: FileFormat -> DB.ByteString --runParser :: FileFormat -> DB.ByteString
-> IO (Either String [[(DB.ByteString, DB.ByteString)]]) -- -> IO (Either String [[(DB.ByteString, DB.ByteString)]])
runParser format text = pure $ parseOnly (withParser format) text --runParser format text = pure $ parseOnly (withParser format) text
--
openZip :: FilePath -> IO [DB.ByteString] --openZip :: FilePath -> IO [DB.ByteString]
openZip fp = do --openZip fp = do
path <- resolveFile' fp -- path <- resolveFile' fp
entries <- withArchive path (DM.keys <$> getEntries) -- entries <- withArchive path (DM.keys <$> getEntries)
bs <- mapConcurrently (\s -> withArchive path (getEntry s)) entries -- bs <- mapConcurrently (\s -> withArchive path (getEntry s)) entries
pure bs -- pure bs
...@@ -31,19 +31,19 @@ import Data.Maybe (isJust, fromJust, maybe) ...@@ -31,19 +31,19 @@ import Data.Maybe (isJust, fromJust, maybe)
import Protolude ( Bool(True, False), Int, Double, Integer import Protolude ( Bool(True, False), Int, Double, Integer
, Fractional, Num, Maybe(Just,Nothing) , Fractional, Num, Maybe(Just,Nothing)
, Floating, Char, IO , Floating, Char, IO
, pure, (<$>), panic , pure, (<$>), (<&>), panic
, head, flip , head, flip
, Ord, Integral, Foldable, RealFrac, Monad, filter , Ord, Integral, Foldable, RealFrac, Monad, filter
, reverse, map, zip, drop, take, zipWith , reverse, map, zip, drop, take, zipWith
, sum, fromIntegral, length, fmap, foldl, foldl' , sum, fromIntegral, length, fmap, foldl, foldl'
, takeWhile, sqrt, undefined, identity , takeWhile, sqrt, undefined, identity
, abs, min, max, maximum, minimum, return, snd, truncate , abs, min, max, maximum, minimum, return, snd, truncate
, (+), (*), (/), (-), (.), ($), (**), (^), (<), (>), log , (+), (*), (/), (-), (.), ($), (&), (**), (^), (<), (>), log
, Eq, (==), (>=), (<=), (<>), (/=) , Eq, (==), (>=), (<=), (<>), (/=)
, (&&), (||), not , (&&), (||), not
, fst, snd, toS , fst, snd, toS
, elem, die, mod, div , elem, die, mod, div
, curry, uncurry , curry, uncurry, repeat
) )
-- TODO import functions optimized in Utils.Count -- TODO import functions optimized in Utils.Count
...@@ -235,3 +235,4 @@ zipSnd f xs = zip xs (f xs) ...@@ -235,3 +235,4 @@ zipSnd f xs = zip xs (f xs)
unMaybe :: [Maybe a] -> [a] unMaybe :: [Maybe a] -> [a]
unMaybe = map fromJust . L.filter isJust unMaybe = map fromJust . L.filter isJust
...@@ -2,6 +2,7 @@ flags: {} ...@@ -2,6 +2,7 @@ flags: {}
extra-package-dbs: [] extra-package-dbs: []
packages: packages:
- . - .
- '/home/alexandre/local/logiciels/haskell/accelerate/accelerate'
allow-newer: true allow-newer: true
extra-deps: extra-deps:
...@@ -15,18 +16,20 @@ extra-deps: ...@@ -15,18 +16,20 @@ extra-deps:
- aeson-lens-0.5.0.0 - aeson-lens-0.5.0.0
- duckling-0.1.3.0 - duckling-0.1.3.0
- extra-1.5.3 - extra-1.5.3
- file-embed-lzma-0
- haskell-src-exts-1.18.2 - haskell-src-exts-1.18.2
- http-types-0.12.1 - http-types-0.12.1
- protolude-0.2 - protolude-0.2
- servant-0.12.1 - servant-0.13.0.1
- servant-auth-0.3.0.1 - servant-auth-0.3.1.0
- servant-client-0.12.0.1 - servant-client-0.13.0.1
- servant-client-core-0.12 - servant-client-core-0.13.0.1
- servant-docs-0.11.1 - servant-docs-0.11.2
- servant-multipart-0.11.1 - servant-multipart-0.11.1
- servant-server-0.12 - servant-server-0.13.0.1
- servant-swagger-ui-0.2.3.2.2.8 - servant-swagger-ui-0.3.0.3.13.2
- servant-swagger-ui-core-0.3
- stemmer-0.5.2 - stemmer-0.5.2
- text-1.2.3.0 - text-1.2.3.0
- text-show-3.6.2 - text-show-3.6.2
resolver: lts-10.6 resolver: lts-11.6
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment