Count.hs 1.19 KB
Newer Older
1 2 3 4
{-# LANGUAGE OverloadedStrings #-}

module Gargantext.Ngrams.Count where

5 6
import Gargantext.Prelude

7 8 9

import Data.Foldable as F

10
import Data.Map.Strict (insertWith)
11 12 13
import Data.Map (Map)
import qualified Data.Map as M

14
--import qualified Data.Text.Lazy.IO as DTLIO
15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34
import qualified Data.Text.Lazy as DTL

-- | /O(n)/ Breaks a 'Text' up into each Text list of chars.
-- from slower to faster:
letters :: DTL.Text -> [DTL.Text]
letters text = DTL.chunksOf 1 text

letters' :: DTL.Text -> [DTL.Text]
letters' text = DTL.splitOn "#" $ DTL.intersperse '#' text

letters'' :: DTL.Text -> [DTL.Text]
letters'' = DTL.foldr (\ch xs -> DTL.singleton ch : xs) []


-- words
-- lines
-- words between punctuation
-- number of punctuation

occurrences :: Ord a => [a] -> Map a Int
35
occurrences xs = foldl' (\x y -> insertWith (+) y 1 x) M.empty xs
36 37 38 39 40

-- for optimization :
--occurrences' :: Ord a => [a] -> Map a Integer
--occurrences' xs = DTL.foldl (\x y -> M.insertWith' (+) y 1 x) M.empty xs

41 42 43 44 45 46 47 48
--countMain :: IO ()
--countMain = do
--  (fichier:_) <- getArgs
--  c <- DTLIO.readFile fichier
--  --print $ occurrences $ DTL.chunksOf 1 c
--  pure $ occurrences $ letters'' c
--  --print $ occurrences $ DTL.words $ DTL.toLower c
--