Commit d7fd1875 authored by Alexandre Delanoë's avatar Alexandre Delanoë

[FEAT/Distances] Accelerate lib for GPU: conditional and distributional. Needs behavioral tests.

parent 46330fdd
......@@ -26,9 +26,12 @@ import Data.Text (unpack)
import Gargantext.Prelude
import Gargantext.API (startGargantext, startGargantextMock)
--------------------------------------------------------
-- Tests
import qualified Gargantext.Graph.Utils as U
import qualified Gargantext.Graph.Distances.Conditional as C
import qualified Gargantext.Graph.Distances.Distributional as D
import qualified Gargantext.Graph.Distances.Matrice as M
--------------------------------------------------------
data Mode = Dev | Mock | Prod
......@@ -48,8 +51,9 @@ instance ParseRecord (MyOptions Wrapped)
deriving instance Show (MyOptions Unwrapped)
main :: IO ()
main = putStrLn $ show $ C.conditional U.m1
--main = putStrLn $ show $ map show $ take 10 $ D.distributional U.m1
main = do
putStrLn $ show $ M.conditional $ M.myMat 10
--putStrLn $ show $ M.size' $ M.myMat 100
main' :: IO ()
main' = do
......
......@@ -37,6 +37,7 @@ library:
- Gargantext.Graph.Utils
- Gargantext.Graph.Distances.Conditional
- Gargantext.Graph.Distances.Distributional
- Gargantext.Graph.Distances.Matrice
- Gargantext.Ngrams
- Gargantext.Ngrams.Analysis
- Gargantext.Ngrams.TFICF
......@@ -61,6 +62,7 @@ library:
- Gargantext.Utils.Prefix
dependencies:
- QuickCheck
- accelerate
- aeson
- aeson-lens
- aeson-pretty
......
......@@ -34,10 +34,55 @@ import qualified Data.Vector as V
import Gargantext.Prelude
import Gargantext.Graph.Utils
------------------------------------------------------------------------
------------------------------------------------------------------------
-- | Optimisation issue
toBeOptimized :: (Num a, Fractional a, Ord a) => Matrix a -> Matrix a
toBeOptimized m = proba Col m
------------------------------------------------------------------------
-- | Main Functions
-- Compute the probability from axis
-- x' = x / (sum Col x)
proba :: (Num a, Fractional a) => Axis -> Matrix a -> Matrix a
proba a m = mapOn a (\c x -> x / V.sum (axis a c m)) m
mapOn :: Axis -> (AxisId -> a -> a) -> Matrix a -> Matrix a
mapOn a f m = V.foldl' f' m (V.enumFromTo 1 (nOf a m))
where
f' m' c = mapOnly a f c m'
mapOnly :: Axis -> (AxisId -> a -> a) -> AxisId -> Matrix a -> Matrix a
mapOnly Col = mapCol
mapOnly Row = mapRow
mapAll :: (a -> a) -> Matrix a -> Matrix a
mapAll f m = mapOn Col (\_ -> f) m
---------------------------------------------------------------
-- | Compute a distance from axis
-- xs = (sum Col x') - x'
distFromSum :: (Num a, Fractional a)
=> Axis -> Matrix a -> Matrix a
distFromSum a m = mapOn a (\c x -> V.sum (axis a c m) - x) m
---------------------------------------------------------------
---------------------------------------------------------------
-- | To compute included/excluded or specific/generic scores
opWith :: (Fractional a1, Num a1)
=> (Matrix a2 -> t -> Matrix a1) -> Matrix a2 -> t -> Matrix a1
opWith op xs ys = mapAll (\x -> x / (2*n -1)) (xs `op` ys)
where
n = fromIntegral $ nOf Col xs
---------------------------------------------------------------
-------------------------------------------------------
conditional :: (Num a, Fractional a, Ord a) => Matrix a -> Matrix a
conditional m = x' -- filter (threshold m') m'
conditional m = filter (threshold m') m'
where
------------------------------------------------------------------------
-- | Main Operations
......@@ -88,26 +133,3 @@ conditional m = x' -- filter (threshold m') m'
False -> 0
------------------------------------------------------------------------
------------------------------------------------------------------------
-- | Main Functions
-- Compute the probability from axis
-- x' = x / (sum Col x)
proba :: (Num a, Fractional a) => Axis -> Matrix a -> Matrix a
proba a m = mapOn a (\c x -> x / V.sum (axis a c m)) m
---------------------------------------------------------------
-- | Compute a distance from axis
-- xs = (sum Col x') - x'
distFromSum :: (Num a, Fractional a)
=> Axis -> Matrix a -> Matrix a
distFromSum a m = mapOn a (\c x -> V.sum (axis a c m) - x) m
---------------------------------------------------------------
---------------------------------------------------------------
-- | To compute included/excluded or specific/generic scores
opWith :: (Fractional a1, Num a1)
=> (Matrix a2 -> t -> Matrix a1) -> Matrix a2 -> t -> Matrix a1
opWith op xs ys = mapAll (\x -> x / (2*n -1)) (xs `op` ys)
where
n = fromIntegral $ nOf Col xs
---------------------------------------------------------------
......@@ -7,7 +7,7 @@ Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
Motivation and definition of the @Conditional@ distance.
Motivation and definition of the @Distributional@ distance.
-}
{-# LANGUAGE BangPatterns #-}
......@@ -64,7 +64,7 @@ mi m = matrix c r createMat
where
(c,r) = (nOf Col m, nOf Row m)
createMat (x,y) = doMi x y m
doMi x y m = if x == y then 0 else (nonNegative $ log (doMi' x y m))
doMi x y m = if x == y then 0 else (max (log (doMi' x y m)) 0 )
doMi' x y m = (getElem x y m) / ( cross x y m / total m )
......@@ -79,9 +79,6 @@ ax a i j m = dropAt j' $ axis a i' m
j' = mod r j + 1
(c,r) = (nOf Col m, nOf Row m)
nonNegative :: (Ord a, Num a) => a -> a
nonNegative x = if x > 0 then x else 0
miniMax :: (Ord a) => Matrix a -> a
miniMax m = V.minimum $ V.map (\c -> V.maximum $ getCol c m) (V.enumFromTo 1 (nOf Col m))
......
{-|
Module : Gargantext.Graph.Distances.Matrix
Description :
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
Motivation and definition of the @Conditional@ distance.
-}
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE TypeFamilies #-}
module Gargantext.Graph.Distances.Matrice
where
import Data.Array.Accelerate.Data.Bits
import Data.Array.Accelerate.Interpreter
import Data.Array.Accelerate
import Data.Array.Accelerate.Smart
import Data.Array.Accelerate.Type
import Data.Array.Accelerate.Array.Sugar (fromArr, Array, Z)
import Data.Maybe (Maybe(Just))
import qualified Gargantext.Prelude as P
import qualified Data.Array.Accelerate.Array.Representation as Repr
matrix :: Elt c => Int -> [c] -> Matrix c
matrix n l = fromList (Z :. n :. n) l
myMat :: Int -> Matrix Double
myMat n = matrix n [1..]
-- | Two ways to get the rank (as documentation)
rank :: (Matrix Double) -> Int
rank m = arrayRank $ arrayShape m
rank' :: (Matrix Double) -> Int
rank' m = n
where
Z :. _ :. n = arrayShape m
-----------------------------------------------------------------------
-- | Conditional Distance
type Rank = Int
proba :: Rank -> Acc (Matrix Double) -> Acc (Matrix Double)
proba r mat = zipWith (/) mat (mkSum r mat)
mkSum :: Rank -> Acc (Matrix Double) -> Acc (Matrix Double)
mkSum r mat = replicate (constant (Z :. (r :: Int) :. All))
$ fold (+) 0 mat
type Matrix' a = Acc (Matrix a)
conditional :: Matrix Double -> (Matrix Double, Matrix Double)
conditional m = (run $ ie (use m), run $ sg (use m))
where
r :: Rank
r = rank' m
xs :: Matrix' Double -> Matrix' Double
xs mat = zipWith (-) (proba r mat) (mkSum r $ proba r mat)
ys :: Acc (Matrix Double) -> Acc (Matrix Double)
ys mat = zipWith (-) (proba r mat) (mkSum r $ transpose $ proba r mat)
ie :: Matrix' Double -> Matrix' Double
ie mat = map (\x -> x / (2*n-1)) $ zipWith (+) (xs mat) (ys mat)
sg :: Acc (Matrix Double) -> Acc (Matrix Double)
sg mat = map (\x -> x / (2*n-1)) $ zipWith (-) (xs mat) (ys mat)
n :: Exp Double
n = P.fromIntegral r
-- filter with threshold
-----------------------------------------------------------------------
-- | Distributional Distance
distributional :: Matrix Double -> Matrix Double
distributional m = run $ filter $ ri (use m)
where
n = rank m
filter m = zipWith (\a b -> max a b) m (transpose m)
--miniMax m = fold minimum $ fold maximum m
ri mat = zipWith (/) mat1 mat2
where
mat1 = mkSum n $ zipWith min (mi mat) (mi $ transpose mat)
mat2 = mkSum n mat
mi m' = zipWith (\a b -> max (log $ a/b) 0) m'
$ zipWith (/) (crossProduct m') (total m')
total m'' = replicate (constant (Z :. n :. n)) $ fold (+) 0 $ fold (+) 0 m''
crossProduct m = zipWith (*) (cross m ) (cross (transpose m))
cross mat = zipWith (-) (mkSum n mat) (mat)
......@@ -32,6 +32,7 @@ import qualified Data.List as L
import Gargantext.Prelude
------------------------------------------------------------------------
-- | Some utils to build the matrix from cooccurrence results
type Distance = Double
type Cooc = Int
......@@ -117,14 +118,13 @@ data Axis = Col | Row
--divide :: Fractional a => Matrix a -> Matrix a -> Matrix a
--divide a b = a `multStd` (div b)
total :: Num a => Matrix a -> a
total m = V.sum $ V.map (\c -> V.sum (getCol c m)) (V.enumFromTo 1 (nOf Col m))
------------------------------------------------------------------------
-- | Matrix functions
type AxisId = Int
total :: Num a => Matrix a -> a
total m = V.sum $ V.map (\c -> V.sum (getCol c m)) (V.enumFromTo 1 (nOf Col m))
nOf :: Axis -> Matrix a -> Int
nOf Row = nrows
nOf Col = ncols
......@@ -133,21 +133,6 @@ axis :: Axis -> AxisId -> Matrix a -> Vector a
axis Col = getCol
axis Row = getRow
--mapOn' :: Axis -> (a -> a) -> Matrix a -> Matrix a
--mapOn' a f m = foldl' (\m' aId -> mapOn a (aId f) m') m [1.. (nOf a m)]
mapOn :: Axis -> (AxisId -> a -> a) -> Matrix a -> Matrix a
mapOn a f m = V.foldl' f' m (V.enumFromTo 1 (nOf a m))
where
f' m' c = mapOnly a f c m'
mapOnly :: Axis -> (AxisId -> a -> a) -> AxisId -> Matrix a -> Matrix a
mapOnly Col = mapCol
mapOnly Row = mapRow
mapAll :: (a -> a) -> Matrix a -> Matrix a
mapAll f m = mapOn Col (\_ -> f) m
toListsWithIndex :: Matrix a -> [((Int, Int), a)]
toListsWithIndex m = concat' $ zip [1..] $ map (\c -> zip [1..] c) $ toLists m
......@@ -156,7 +141,6 @@ toListsWithIndex m = concat' $ zip [1..] $ map (\c -> zip [1..] c) $ toLists m
concat' xs = L.concat $ map (\(x, ys) -> map (\(y, a) -> ((x,y), a)) ys ) xs
-- | For tests only, to be removed
m1 :: Matrix Double
m1 = fromList 300 300 [1..]
......
......@@ -75,37 +75,37 @@ data FileFormat = WOS -- Implemented (ISI Format)
-- TODO: to debug maybe add the filepath in error message
parse :: FileFormat -> FilePath -> IO ([ParseError], [[(Text, Text)]])
parse format path = do
files <- case takeExtension path of
".zip" -> openZip path
_ -> pure <$> DB.readFile path
(as, bs) <- partitionEithers <$> mapConcurrently (runParser format) files
pure (as, map toText $ concat bs)
where
-- TODO : decode with bayesian inference on encodings
toText = map (\(a,b) -> (decodeUtf8 a, decodeUtf8 b))
-- | withParser:
-- According the format of the text, choosing the right parser.
-- TODO withParser :: FileFormat -> Parser [Document]
withParser :: FileFormat -> Parser [[(DB.ByteString, DB.ByteString)]]
withParser WOS = wosParser
--withParser DOC = docParser
--withParser ODT = odtParser
--withParser XML = xmlParser
--withParser _ = error "[ERROR] Parser not implemented yet"
runParser :: FileFormat -> DB.ByteString
-> IO (Either String [[(DB.ByteString, DB.ByteString)]])
runParser format text = pure $ parseOnly (withParser format) text
openZip :: FilePath -> IO [DB.ByteString]
openZip fp = do
path <- resolveFile' fp
entries <- withArchive path (DM.keys <$> getEntries)
bs <- mapConcurrently (\s -> withArchive path (getEntry s)) entries
pure bs
--parse :: FileFormat -> FilePath -> IO ([ParseError], [[(Text, Text)]])
--parse format path = do
-- files <- case takeExtension path of
-- ".zip" -> openZip path
-- _ -> pure <$> DB.readFile path
-- (as, bs) <- partitionEithers <$> mapConcurrently (runParser format) files
-- pure (as, map toText $ concat bs)
-- where
-- -- TODO : decode with bayesian inference on encodings
-- toText = map (\(a,b) -> (decodeUtf8 a, decodeUtf8 b))
--
--
---- | withParser:
---- According the format of the text, choosing the right parser.
---- TODO withParser :: FileFormat -> Parser [Document]
--withParser :: FileFormat -> Parser [[(DB.ByteString, DB.ByteString)]]
--withParser WOS = wosParser
----withParser DOC = docParser
----withParser ODT = odtParser
----withParser XML = xmlParser
----withParser _ = error "[ERROR] Parser not implemented yet"
--
--runParser :: FileFormat -> DB.ByteString
-- -> IO (Either String [[(DB.ByteString, DB.ByteString)]])
--runParser format text = pure $ parseOnly (withParser format) text
--
--openZip :: FilePath -> IO [DB.ByteString]
--openZip fp = do
-- path <- resolveFile' fp
-- entries <- withArchive path (DM.keys <$> getEntries)
-- bs <- mapConcurrently (\s -> withArchive path (getEntry s)) entries
-- pure bs
......@@ -31,19 +31,19 @@ import Data.Maybe (isJust, fromJust, maybe)
import Protolude ( Bool(True, False), Int, Double, Integer
, Fractional, Num, Maybe(Just,Nothing)
, Floating, Char, IO
, pure, (<$>), panic
, pure, (<$>), (<&>), panic
, head, flip
, Ord, Integral, Foldable, RealFrac, Monad, filter
, reverse, map, zip, drop, take, zipWith
, sum, fromIntegral, length, fmap, foldl, foldl'
, takeWhile, sqrt, undefined, identity
, abs, min, max, maximum, minimum, return, snd, truncate
, (+), (*), (/), (-), (.), ($), (**), (^), (<), (>), log
, (+), (*), (/), (-), (.), ($), (&), (**), (^), (<), (>), log
, Eq, (==), (>=), (<=), (<>), (/=)
, (&&), (||), not
, fst, snd, toS
, elem, die, mod, div
, curry, uncurry
, curry, uncurry, repeat
)
-- TODO import functions optimized in Utils.Count
......@@ -235,3 +235,4 @@ zipSnd f xs = zip xs (f xs)
unMaybe :: [Maybe a] -> [a]
unMaybe = map fromJust . L.filter isJust
......@@ -2,6 +2,7 @@ flags: {}
extra-package-dbs: []
packages:
- .
- '/home/alexandre/local/logiciels/haskell/accelerate/accelerate'
allow-newer: true
extra-deps:
......@@ -15,18 +16,20 @@ extra-deps:
- aeson-lens-0.5.0.0
- duckling-0.1.3.0
- extra-1.5.3
- file-embed-lzma-0
- haskell-src-exts-1.18.2
- http-types-0.12.1
- protolude-0.2
- servant-0.12.1
- servant-auth-0.3.0.1
- servant-client-0.12.0.1
- servant-client-core-0.12
- servant-docs-0.11.1
- servant-0.13.0.1
- servant-auth-0.3.1.0
- servant-client-0.13.0.1
- servant-client-core-0.13.0.1
- servant-docs-0.11.2
- servant-multipart-0.11.1
- servant-server-0.12
- servant-swagger-ui-0.2.3.2.2.8
- servant-server-0.13.0.1
- servant-swagger-ui-0.3.0.3.13.2
- servant-swagger-ui-core-0.3
- stemmer-0.5.2
- text-1.2.3.0
- text-show-3.6.2
resolver: lts-10.6
resolver: lts-11.6
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment