Commit 7fe6bf9e authored by Alexandre Delanoë's avatar Alexandre Delanoë

[FIX] types for cooc function before refactoring.

parent affb8b80
...@@ -22,6 +22,8 @@ Main specifications to index a corpus with a term list ...@@ -22,6 +22,8 @@ Main specifications to index a corpus with a term list
module Main where module Main where
import Data.Maybe (catMaybes)
import Data.Text (pack)
import qualified Data.Vector as DV import qualified Data.Vector as DV
import qualified Data.Maybe as DMaybe import qualified Data.Maybe as DMaybe
...@@ -44,6 +46,7 @@ import Gargantext.Prelude ...@@ -44,6 +46,7 @@ import Gargantext.Prelude
import Gargantext.Core import Gargantext.Core
import Gargantext.Core.Types import Gargantext.Core.Types
import Gargantext.Text.Terms import Gargantext.Text.Terms
import Gargantext.Text.Context
import Gargantext.Text.Terms.WithList import Gargantext.Text.Terms.WithList
import Gargantext.Text.Parsers.CSV (readCsv, csv_title, csv_abstract, csv_publication_year) import Gargantext.Text.Parsers.CSV (readCsv, csv_title, csv_abstract, csv_publication_year)
import Gargantext.Text.List.CSV (csvGraphTermList) import Gargantext.Text.List.CSV (csvGraphTermList)
...@@ -105,3 +108,23 @@ main = do ...@@ -105,3 +108,23 @@ main = do
r <- mapConcurrently (filterTermsAndCooc patterns) (DM.toList corpus) r <- mapConcurrently (filterTermsAndCooc patterns) (DM.toList corpus)
putStrLn $ show r putStrLn $ show r
--writeFile outputFile cooc --writeFile outputFile cooc
testCooc = do
let patterns = WithList $ buildPatterns testTermList
mapM (\x -> {-log "work" >>-} terms patterns x) $ catMaybes $ map (head . snd) testCorpus
--mapConcurrently (filterTermsAndCooc patterns) testCorpus
testCorpus :: [(Int, [Text])]
testCorpus = [ (1998, [pack "The beees"])
, (1999, [ pack "The bees and the flowers"
--, pack "The bees and the flowers"
])
]
testTermList :: TermList
testTermList = [ ([pack "bee"], [[pack "bees"]])
, ([pack "flower"], [[pack "flowers"]])
]
...@@ -28,13 +28,13 @@ Source : https://en.wikipedia.org/wiki/Type%E2%80%93token_distinction#Occurrence ...@@ -28,13 +28,13 @@ Source : https://en.wikipedia.org/wiki/Type%E2%80%93token_distinction#Occurrence
module Gargantext.Text.Metrics.Count module Gargantext.Text.Metrics.Count
where where
import Data.Text (Text)
import Control.Arrow (Arrow(..), (***)) import Control.Arrow (Arrow(..), (***))
import qualified Data.List as List import qualified Data.List as List
import qualified Data.Map.Strict as DMS import qualified Data.Map.Strict as DMS
import Data.Map.Strict ( Map, empty, singleton import Data.Map.Strict ( Map, empty, singleton
, insertWith, unionWith , insertWith, unionWith, unionsWith
, mapKeys , mapKeys
) )
import Data.Set (Set) import Data.Set (Set)
...@@ -108,16 +108,29 @@ labelPolicy m g = case _terms_label <$> fst <$> maximumWith snd <$> DMS.toList ...@@ -108,16 +108,29 @@ labelPolicy m g = case _terms_label <$> fst <$> maximumWith snd <$> DMS.toList
-} -}
coocOn :: Ord b => (a -> b) -> [[a]] -> Map (b, b) Coocs coocOn :: Ord b => (a -> b) -> [[a]] -> Map (b, b) Coocs
coocOn f as = foldl' (\a b -> DMS.unionWith (+) a b) empty $ map (coocOn' f) as coocOn f as = DMS.unionsWith (+) $ map (coocOn' f) as
coocOn' :: Ord b => (a -> b) -> [a] -> Map (b, b) Coocs
coocOn' fun ts = DMS.fromListWith (+) xs
where where
coocOn' :: Ord b => (a -> b) -> [a] -> Map (b, b) Coocs ts' = List.nub $ map fun ts
coocOn' fun ts = foldl' (\m (xy,c) -> insertWith ((+)) xy c m) empty xs xs = [ ((x, y), 1)
| x <- ts'
, y <- ts'
, x >= y
]
coocOnContexts :: (a -> [Text]) -> [[a]] -> Map ([Text], [Text]) Int
coocOnContexts fun = DMS.fromListWith (+) . List.concat . map (coocOnSingleContext fun)
coocOnSingleContext :: (a -> [Text]) -> [a] -> [(([Text], [Text]), Int)]
coocOnSingleContext fun ts = xs
where where
ts' = List.nub $ map fun ts ts' = List.nub $ map fun ts
xs = [ ((x, y), 1) xs = [ ((x, y), 1)
| x <- ts' | x <- ts'
, y <- ts' , y <- ts'
-- , x /= y , x >= y
] ]
...@@ -131,6 +144,6 @@ occurrencesOn f = foldl' (\m a -> insertWith (unionWith (+)) (f a) (singleton a ...@@ -131,6 +144,6 @@ occurrencesOn f = foldl' (\m a -> insertWith (unionWith (+)) (f a) (singleton a
-- TODO add groups and filter stops -- TODO add groups and filter stops
sumOcc :: Ord a => [Occ a] -> Occ a sumOcc :: Ord a => [Occ a] -> Occ a
sumOcc xs = foldl' (unionWith (+)) empty xs sumOcc xs = unionsWith (+) xs
...@@ -47,6 +47,10 @@ import Gargantext.Text.Terms.WithList (Patterns, extractTermsWithList) ...@@ -47,6 +47,10 @@ import Gargantext.Text.Terms.WithList (Patterns, extractTermsWithList)
data TermType lang = Mono lang | Multi lang | MonoMulti lang | WithList Patterns data TermType lang = Mono lang | Multi lang | MonoMulti lang | WithList Patterns
group :: [Text] -> [Text]
group = undefined
-- remove Stop Words -- remove Stop Words
-- map (filter (\t -> not . elem t)) $ -- map (filter (\t -> not . elem t)) $
------------------------------------------------------------------------ ------------------------------------------------------------------------
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment