Prelude.hs 9.23 KB
Newer Older
1 2
{-|
Module      : Gargantext.Prelude
3
Description : Specific Prelude of the project
4 5 6 7 8 9 10 11
Copyright   : (c) CNRS, 2017-Present
License     : AGPL + CECILL v3
Maintainer  : team@gargantext.org
Stability   : experimental
Portability : POSIX

-}

12 13 14 15 16 17
{-# OPTIONS_GHC -fno-warn-name-shadowing #-}
{-# OPTIONS_GHC -fno-warn-type-defaults  #-}

module Gargantext.Prelude
  ( module Gargantext.Prelude
  , module Protolude
18
  , headMay, lastMay
19
  , module GHC.Err.Located
20 21
  , module Text.Show
  , module Text.Read
22
  , cs
23
  , module Data.Maybe
24
  , round
25
  , sortWith
26
  , module Prelude
27
  , MonadBase(..)
28
  , Typeable
29 30 31
  )
  where

32
import Control.Monad.Base (MonadBase(..))
33
import GHC.Exts (sortWith)
34
import GHC.Err.Located (undefined)
35
import GHC.Real (round)
36
import Data.Map (Map, lookup)
37
import Data.Maybe (isJust, fromJust, maybe)
38
import Data.Text (Text)
39
import Data.Typeable (Typeable)
40
import Protolude ( Bool(True, False), Int, Int64, Double, Integer
41
                 , Fractional, Num, Maybe(Just,Nothing)
42
                 , Enum, Bounded, Float
43
                 , Floating, Char, IO
44
                 , pure, (>>=), (=<<), (<*>), (<$>), (<&>), (>>)
45
                 , head, flip
46
                 , Ord, Integral, Foldable, RealFrac, Monad, filter
47
                 , reverse, map, mapM, zip, drop, take, zipWith
48
                 , sum, fromIntegral, length, fmap, foldl, foldl'
49
                 , takeWhile, sqrt, identity
50
                 , abs, min, max, maximum, minimum, return, snd, truncate
51
                 , (+), (*), (/), (-), (.), ($), (&), (**), (^), (<), (>), log
52
                 , Eq, (==), (>=), (<=), (<>), (/=)
53
                 , (&&), (||), not, any, all
54
                 , concatMap
55
                 , fst, snd, toS
56
                 , elem, die, mod, div, const, either
57
                 , curry, uncurry, repeat
58
                 , otherwise, when
59
                 , IO()
60
                 , compare
61
                 , on
62
                 , panic
63
                 , seq
64 65
                 )

66
import Prelude (Enum, Bounded, minBound, maxBound, putStrLn)
67 68 69 70 71
-- TODO import functions optimized in Utils.Count
-- import Protolude hiding (head, last, all, any, sum, product, length)
-- import Gargantext.Utils.Count
import qualified Data.List     as L hiding (head, sum)
import qualified Control.Monad as M
72
import qualified Data.Map      as M
73
import Data.Map.Strict (insertWith)
74
import qualified Data.Vector as V
75
import Safe (headMay, lastMay, initMay, tailMay)
76 77
import Text.Show (Show(), show)
import Text.Read (Read())
78 79
import Data.String.Conversions (cs)

80

81 82
printDebug :: (Show a, MonadBase IO m) => [Char] -> a -> m ()
printDebug msg x = liftBase . putStrLn $ msg <> " " <> show x
83 84 85
-- printDebug _ _ = pure ()


86 87
-- | splitEvery n == chunkAlong n n
splitEvery :: Int -> [a] -> [[a]]
88
splitEvery _ [] = []
89 90 91
splitEvery n xs =
  let (h,t) = L.splitAt n xs
  in  h : splitEvery n t
92

93 94 95
type Grain = Int
type Step  = Int

96
-- | Function to split a range into chunks
97
-- if   step == grain then linearity (splitEvery)
98 99
-- elif step < grain then overlapping
-- else dotted with holes
100 101 102 103 104 105
-- TODO FIX BUG if Steps*Grain /= length l
-- chunkAlong 10 10 [1..15] == [1..10]
-- BUG: what about the rest of (divMod 15 10)?
-- TODO: chunkAlongNoRest or chunkAlongWithRest
-- default behavior: NoRest

106
chunkAlong :: Eq a => Grain -> Step -> [a] -> [[a]]
107 108 109
chunkAlong a b l = case a >= length l of
  True  -> [l]
  False -> chunkAlong' a b l
110

111 112 113 114 115 116 117 118 119 120 121
chunkAlong' :: Eq a => Grain -> Step -> [a] -> [[a]]
chunkAlong' a b l = case a > 0 && b > 0 of
  True  -> chunkAlong'' a b l
  False -> panic "ChunkAlong: Parameters should be > 0 and Grain > Step"

chunkAlong'' :: Eq a => Int -> Int -> [a] -> [[a]]
chunkAlong'' a b l = filter (/= []) $ only (while dropAlong)
  where
    only      = map       (take a)
    while     = takeWhile (\x -> length x >= a)
    dropAlong = L.scanl   (\x _y -> drop b x) l ([1..] :: [Integer])
122 123

-- | Optimized version (Vector)
124 125 126 127 128 129
chunkAlongV :: Int -> Int -> V.Vector a -> V.Vector (V.Vector a)
chunkAlongV a b l = only (while  dropAlong)
  where
    only      = V.map       (V.take a)
    while     = V.takeWhile (\x -> V.length x >= a)
    dropAlong = V.scanl     (\x _y -> V.drop b x) l (V.fromList [1..])
130 131

-- | TODO Inverse of chunk ? unchunkAlong ?
132 133
-- unchunkAlong :: Int -> Int -> [[a]] -> [a]
-- unchunkAlong = undefined
134 135 136 137 138 139


-- splitAlong [2,3,4] ("helloworld" :: [Char]) == ["he", "llo", "worl", "d"]
splitAlong :: [Int] -> [Char] -> [[Char]]
splitAlong _ [] = [] -- No list? done
splitAlong [] xs = [xs] -- No place to split at? Return the remainder
140 141
splitAlong (x:xs) ys = take x ys : splitAlong xs (drop x ys)
-- take until our split spot, recurse with next split spot and list remainder
142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169

takeWhileM :: (Monad m) => (a -> Bool) -> [m a] -> m [a]
takeWhileM _ [] = return []
takeWhileM p (a:as) = do
    v <- a
    if p v
        then do
            vs <- takeWhileM p as
            return (v:vs)
        else return []

-- SUMS
-- To select the right algorithme according to the type:
-- https://github.com/mikeizbicki/ifcxt

sumSimple :: Num a => [a] -> a
sumSimple = L.foldl' (+) 0

-- | https://en.wikipedia.org/wiki/Kahan_summation_algorithm
sumKahan :: Num a => [a] -> a
sumKahan = snd . L.foldl' go (0,0)
    where
        go (c,t) i = ((t'-t)-y,t')
            where
                y  = i-c
                t' = t+y

-- | compute part of the dict
170 171
count2map :: (Ord k, Foldable t) => t k -> Map k Double
count2map xs = M.map (/ (fromIntegral (length xs))) (count2map' xs)
172 173

-- | insert in a dict
174 175
count2map' :: (Ord k, Foldable t) => t k -> Map k Double
count2map' xs = L.foldl' (\x y -> insertWith (+) y 1 x) M.empty xs
176 177 178 179 180 181 182

trunc :: (RealFrac a, Integral c, Integral b) => b -> a -> c
trunc n = truncate . (* 10^n)

trunc' :: Int -> Double -> Double
trunc' n x = fromIntegral $ truncate $ (x * 10^n)

183 184 185 186
------------------------------------------------------------------------
bool2num :: Num a => Bool -> a
bool2num True  = 1
bool2num False = 0
187 188

bool2double :: Bool -> Double
189
bool2double = bool2num
190

191 192 193
bool2int :: Bool -> Int
bool2int = bool2num
------------------------------------------------------------------------
194 195 196 197 198 199

-- Normalizing && scaling data
scale :: [Double] -> [Double]
scale = scaleMinMax

scaleMinMax :: [Double] -> [Double]
200
scaleMinMax xs = map (\x -> (x - mi / (ma - mi + 1) )) xs'
201 202 203
    where
        ma  = maximum xs'
        mi  = minimum xs'
204
        xs' = map abs xs
205 206

scaleNormalize :: [Double] -> [Double]
207
scaleNormalize xs = map (\x -> (x - v / (m + 1))) xs'
208
    where
209 210
        v   = variance  xs'
        m   = mean      xs'
211
        xs' = map abs   xs
212 213 214 215 216

normalize :: [Double] -> [Double]
normalize as = normalizeWith identity as

normalizeWith :: Fractional b => (a -> b) -> [a] -> [b]
217
normalizeWith extract bs = map (\x -> x/(sum bs')) bs'
218
    where
219
        bs' = map extract bs
220 221 222 223 224 225 226

-- Zip functions to add
zipFst :: ([b] -> [a]) -> [b] -> [(a, b)]
zipFst  f xs = zip (f xs) xs

zipSnd :: ([a] -> [b]) -> [a] -> [(a, b)]
zipSnd f xs = zip xs (f xs)
227

228 229
-- | maximumWith
maximumWith :: (Ord a1, Foldable t) => (a2 -> a1) -> t a2 -> a2
230
maximumWith f = L.maximumBy (compare `on` f)
231

232 233
-- | To get all combinations of a list with no
-- repetition and apply a function to the resulting list of pairs
234 235 236
listToCombi :: forall a b. (a -> b) -> [a] -> [(b,b)]
listToCombi f l = [ (f x, f y) | (x:rest) <- L.tails l,  y <- rest ]

237 238 239 240 241 242 243 244 245 246
------------------------------------------------------------------------
-- Empty List Sugar Error Handling
-- TODO add Garg Monad Errors

listSafe1 :: Text -> ([a] -> Maybe a)
          -> Text -> [a] -> a
listSafe1 s f e xs = maybe (panic $ h <> e) identity (f xs)
  where
    h = "[ERR][Gargantext] Empty list for " <> s <> " in "

247
head' :: Text -> [a] -> a
248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266
head' = listSafe1 "head" headMay

last' :: Text -> [a] -> a
last' = listSafe1 "last" lastMay

------------------------------------------------------------------------

listSafeN :: Text -> ([a] -> Maybe [a])
          -> Text -> [a] -> [a]
listSafeN s f e xs = maybe (panic $ h <> e) identity (f xs)
  where
    h = "[ERR][Gargantext] Empty list for " <> s <> " in "

tail' :: Text -> [a] -> [a]
tail' = listSafeN "tail" tailMay

init' :: Text -> [a] -> [a]
init' = listSafeN "init" initMay

267
------------------------------------------------------------------------
268 269 270 271 272 273 274 275 276 277
--- Some Statistics sugar functions
-- Exponential Average
eavg :: [Double] -> Double
eavg (x:xs) = a*x + (1-a)*(eavg xs)
  where a = 0.70
eavg [] = 0

-- Simple Average
mean :: Fractional a => [a] -> a
mean xs = sum xs / fromIntegral (length xs)
278

279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294
sumMaybe :: Num a => [Maybe a] -> Maybe a
sumMaybe = fmap sum . M.sequence

variance :: Floating a => [a] -> a
variance xs = sum ys  / (fromIntegral (length xs) - 1)
  where
    m = mean xs
    ys = map (\x -> (x - m) ** 2) xs

deviation :: Floating a => [a] -> a
deviation = sqrt . variance

movingAverage :: (Eq b, Fractional b) => Int -> [b] -> [b]
movingAverage steps xs = map mean $ chunkAlong steps 1 xs

-----------------------------------------------------------------------
295 296 297 298 299 300 301 302 303 304 305 306 307 308
-----------------------------------------------------------------------
--- Map in Map = Map2
-- To avoid Map (a,a) b
type Map2 a b = Map a (Map a b)

lookup2 :: Ord a
        => a
        -> a
        -> Map2 a b
        -> Maybe b
lookup2 a b m = do
  m' <- lookup a m
  lookup b m'

309 310 311 312 313 314 315 316
-----------------------------------------------

foldM' :: (Monad m) => (a -> b -> m a) -> a -> [b] -> m a
foldM' _ z [] = return z
foldM' f z (x:xs) = do
  z' <- f z x
  z' `seq` foldM' f z' xs

317