Commit 7fe6bf9e authored by Alexandre Delanoë's avatar Alexandre Delanoë

[FIX] types for cooc function before refactoring.

parent affb8b80
......@@ -22,6 +22,8 @@ Main specifications to index a corpus with a term list
module Main where
import Data.Maybe (catMaybes)
import Data.Text (pack)
import qualified Data.Vector as DV
import qualified Data.Maybe as DMaybe
......@@ -44,6 +46,7 @@ import Gargantext.Prelude
import Gargantext.Core
import Gargantext.Core.Types
import Gargantext.Text.Terms
import Gargantext.Text.Context
import Gargantext.Text.Terms.WithList
import Gargantext.Text.Parsers.CSV (readCsv, csv_title, csv_abstract, csv_publication_year)
import Gargantext.Text.List.CSV (csvGraphTermList)
......@@ -78,7 +81,7 @@ filterTermsAndCooc patterns (year, ts) = do
pure r
where
log m = do
tid <- myThreadId
tid <- myThreadId
(p, _) <- threadCapability tid
putStrLn . unwords $
["filterTermsAndCooc:", m, show year, "on proc", show p]
......@@ -105,3 +108,23 @@ main = do
r <- mapConcurrently (filterTermsAndCooc patterns) (DM.toList corpus)
putStrLn $ show r
--writeFile outputFile cooc
testCooc = do
let patterns = WithList $ buildPatterns testTermList
mapM (\x -> {-log "work" >>-} terms patterns x) $ catMaybes $ map (head . snd) testCorpus
--mapConcurrently (filterTermsAndCooc patterns) testCorpus
testCorpus :: [(Int, [Text])]
testCorpus = [ (1998, [pack "The beees"])
, (1999, [ pack "The bees and the flowers"
--, pack "The bees and the flowers"
])
]
testTermList :: TermList
testTermList = [ ([pack "bee"], [[pack "bees"]])
, ([pack "flower"], [[pack "flowers"]])
]
......@@ -28,13 +28,13 @@ Source : https://en.wikipedia.org/wiki/Type%E2%80%93token_distinction#Occurrence
module Gargantext.Text.Metrics.Count
where
import Data.Text (Text)
import Control.Arrow (Arrow(..), (***))
import qualified Data.List as List
import qualified Data.Map.Strict as DMS
import Data.Map.Strict ( Map, empty, singleton
, insertWith, unionWith
, insertWith, unionWith, unionsWith
, mapKeys
)
import Data.Set (Set)
......@@ -69,8 +69,8 @@ type Grouped = Stems
----
-}
type Occs = Int
type Coocs = Int
type Occs = Int
type Coocs = Int
type Threshold = Int
removeApax :: Threshold -> Map (Label, Label) Int -> Map (Label, Label) Int
......@@ -108,17 +108,30 @@ labelPolicy m g = case _terms_label <$> fst <$> maximumWith snd <$> DMS.toList
-}
coocOn :: Ord b => (a -> b) -> [[a]] -> Map (b, b) Coocs
coocOn f as = foldl' (\a b -> DMS.unionWith (+) a b) empty $ map (coocOn' f) as
coocOn f as = DMS.unionsWith (+) $ map (coocOn' f) as
coocOn' :: Ord b => (a -> b) -> [a] -> Map (b, b) Coocs
coocOn' fun ts = DMS.fromListWith (+) xs
where
ts' = List.nub $ map fun ts
xs = [ ((x, y), 1)
| x <- ts'
, y <- ts'
, x >= y
]
coocOnContexts :: (a -> [Text]) -> [[a]] -> Map ([Text], [Text]) Int
coocOnContexts fun = DMS.fromListWith (+) . List.concat . map (coocOnSingleContext fun)
coocOnSingleContext :: (a -> [Text]) -> [a] -> [(([Text], [Text]), Int)]
coocOnSingleContext fun ts = xs
where
coocOn' :: Ord b => (a -> b) -> [a] -> Map (b, b) Coocs
coocOn' fun ts = foldl' (\m (xy,c) -> insertWith ((+)) xy c m) empty xs
where
ts' = List.nub $ map fun ts
xs = [ ((x, y), 1)
| x <- ts'
, y <- ts'
-- , x /= y
]
ts' = List.nub $ map fun ts
xs = [ ((x, y), 1)
| x <- ts'
, y <- ts'
, x >= y
]
-- | Compute the grouped occurrences (occ)
......@@ -131,6 +144,6 @@ occurrencesOn f = foldl' (\m a -> insertWith (unionWith (+)) (f a) (singleton a
-- TODO add groups and filter stops
sumOcc :: Ord a => [Occ a] -> Occ a
sumOcc xs = foldl' (unionWith (+)) empty xs
sumOcc xs = unionsWith (+) xs
......@@ -47,6 +47,10 @@ import Gargantext.Text.Terms.WithList (Patterns, extractTermsWithList)
data TermType lang = Mono lang | Multi lang | MonoMulti lang | WithList Patterns
group :: [Text] -> [Text]
group = undefined
-- remove Stop Words
-- map (filter (\t -> not . elem t)) $
------------------------------------------------------------------------
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment