Commit 06ca7c6f authored by Grégoire Locqueville's avatar Grégoire Locqueville

Added a newtype for cooccurence matrices, with an arbitrary instance

parent 0ab82ad5
Pipeline #7495 passed with stages
in 47 minutes and 51 seconds
{-# LANGUAGE TypeApplications #-} {-# LANGUAGE TypeApplications #-}
{-# LANGUAGE TypeOperators #-} {-# LANGUAGE TypeOperators #-}
{-# LANGUAGE MultiWayIf #-}
{-# LANGUAGE DerivingStrategies #-} {-# LANGUAGE DerivingStrategies #-}
{-# LANGUAGE ScopedTypeVariables #-} {-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE ViewPatterns #-} {-# LANGUAGE ViewPatterns #-}
module Test.Core.LinearAlgebra where module Test.Core.LinearAlgebra where
import Data.Array.Accelerate hiding (Ord, Eq, map, (<=)) import Control.Monad (foldM, replicateM)
import Data.Array.Accelerate hiding (Ord, Eq, map, (&&), (<=), (<), replicate)
import Data.Array.Accelerate.Interpreter qualified as Naive import Data.Array.Accelerate.Interpreter qualified as Naive
import Data.Array.Accelerate qualified as A import Data.Array.Accelerate qualified as A
import Data.Functor ((<&>))
import Data.Massiv.Array qualified as Massiv import Data.Massiv.Array qualified as Massiv
import Data.Proxy import Data.Proxy
import Data.Scientific import Data.Scientific
...@@ -35,7 +36,53 @@ instance (Elt a, Show a, Prelude.Num a, Ord a, Arbitrary a) => Arbitrary (Square ...@@ -35,7 +36,53 @@ instance (Elt a, Show a, Prelude.Num a, Ord a, Arbitrary a) => Arbitrary (Square
x <- choose (1,30) x <- choose (1,30)
let sh = Z :. x :. x let sh = Z :. x :. x
SquareMatrix . A.fromList sh <$> vectorOf (x*x) arbitrary SquareMatrix . A.fromList sh <$> vectorOf (x*x) arbitrary
shrink = map (SquareMatrix) . sliceArray . _SquareMatrix shrink = map SquareMatrix . sliceArray . _SquareMatrix
-- | An alternative matrix datatype specifically for cooccurence matrices. This
-- is conceptually a subtype of `SquareMatrix`, but the `Arbitrary` instance
-- differs so that generated values look more plausible as cooccurence matrices.
newtype CoocMatrix = CoocMatrix { _CoocMatrix :: Matrix Int }
deriving newtype (Show, Eq)
instance Arbitrary CoocMatrix where
-- | We simulate the creation of a cooccurence matrix: there are `numContexts`
-- "virtual contexts" and `numTerms` "virtual terms"; for each context and
-- each term, the term has probability `probAppearance` to appear in the
-- context. The generated matrix is the cooccurence matrix of the resulting
-- "virtual corpus"
arbitrary = do
numContexts <- choose (1, 30)
numTerms <- choose (1, 30)
probAppearance <- (choose (0, 1) :: Gen Double)
-- `appearances` is a list of lists of integers encoding which virtual terms
-- appear in which virtual contexts. More specifically, the integer j
-- appears in the i-th list iff the virtual term j appears in the i-th
-- virtual context.
appearances <- replicateM numContexts $
-- In a given virtual context, iterate over all terms and pick whether
-- they appear in the context with probability `probAppearance`
foldM (\termsSoFar candidateTerm -> do
randomVar <- choose (0, 1)
return $ if randomVar < probAppearance then candidateTerm : termsSoFar
else termsSoFar
) [] [1..numTerms]
let indexMatrix = [1..numTerms] <&> (\i ->
[1..numTerms] <&> (\j ->
(i, j) ))
-- For each pair of virtual terms, iterate over all virtual contexts and add
-- 1 each time both term appear in the context:
let coocMatrix = (fmap . fmap) (\(i, j) ->
foldr (\contextTerms currentCoocCount ->
if i `elem` contextTerms && j `elem` contextTerms
then currentCoocCount + 1
else currentCoocCount
) 0 appearances
) indexMatrix
return $ CoocMatrix
$ A.fromList (Z :. numTerms :. numTerms) -- turn into Accelerate array
$ concat -- flatten
$ coocMatrix
type TermDivNanShape = Z :. Int :. Int type TermDivNanShape = Z :. Int :. Int
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment