Commit 6551bf90 authored by Alexandre Delanoë's avatar Alexandre Delanoë

[COUNT] renaming file and generic function.

parent c34120e3
...@@ -30,7 +30,7 @@ import Gargantext.Prelude ...@@ -30,7 +30,7 @@ import Gargantext.Prelude
import Gargantext.Viz.Graph.Index (score, createIndices, toIndex, fromIndex, cooc2mat, mat2map) import Gargantext.Viz.Graph.Index (score, createIndices, toIndex, fromIndex, cooc2mat, mat2map)
import Gargantext.Viz.Graph.Distances.Matrice (incExcSpeGen, conditional) import Gargantext.Viz.Graph.Distances.Matrice (incExcSpeGen, conditional)
import Gargantext.Viz.Graph.Index (Index) import Gargantext.Viz.Graph.Index (Index)
import Gargantext.Text.Metrics.Occurrences (cooc, removeApax) import Gargantext.Text.Metrics.Count (cooc, removeApax)
import Gargantext.Text.Terms (TermType(Multi, Mono), extractTerms) import Gargantext.Text.Terms (TermType(Multi, Mono), extractTerms)
import Gargantext.Text.Context (splitBy, SplitContext(Sentences)) import Gargantext.Text.Context (splitBy, SplitContext(Sentences))
......
...@@ -27,7 +27,7 @@ import NLP.FullStop (segment) ...@@ -27,7 +27,7 @@ import NLP.FullStop (segment)
----------------------------------------------------------------- -----------------------------------------------------------------
import Gargantext.Core import Gargantext.Core
import Gargantext.Core.Types import Gargantext.Core.Types
import Gargantext.Text.Metrics.Occurrences (Occ, occurrences, cooc) import Gargantext.Text.Metrics.Count (Occ, occurrences, cooc)
import Gargantext.Prelude hiding (filter) import Gargantext.Prelude hiding (filter)
----------------------------------------------------------------- -----------------------------------------------------------------
......
...@@ -15,6 +15,7 @@ Mainly reexport functions in @Data.Text.Metrics@ ...@@ -15,6 +15,7 @@ Mainly reexport functions in @Data.Text.Metrics@
module Gargantext.Text.Metrics where module Gargantext.Text.Metrics where
import Data.Text (Text, pack) import Data.Text (Text, pack)
import qualified Data.Text as T
import Data.List (concat) import Data.List (concat)
--import GHC.Real (Ratio) --import GHC.Real (Ratio)
...@@ -22,7 +23,7 @@ import Data.List (concat) ...@@ -22,7 +23,7 @@ import Data.List (concat)
import Gargantext.Prelude import Gargantext.Prelude
import Gargantext.Text.Metrics.Occurrences (occurrences, cooc) import Gargantext.Text.Metrics.Count (occurrences, cooc)
import Gargantext.Text.Terms (TermType(Multi), terms) import Gargantext.Text.Terms (TermType(Multi), terms)
import Gargantext.Core (Lang(EN)) import Gargantext.Core (Lang(EN))
import Gargantext.Core.Types (Terms(..)) import Gargantext.Core.Types (Terms(..))
...@@ -33,29 +34,35 @@ import Gargantext.Text.Context (splitBy, SplitContext(Sentences)) ...@@ -33,29 +34,35 @@ import Gargantext.Text.Context (splitBy, SplitContext(Sentences))
metrics_text :: Text metrics_text :: Text
metrics_text = "A table is an object. A glas is an object. The glas is on the table. The spoon is an object. The spoon is on the table." metrics_text = T.concat ["A table is an object."
,"A glas is an object too."
,"Using a glas to dring is a function."
,"Using a spoon to eat is a function."
,"The spoon is an object to eat."
]
metrics_sentences' :: [Text]
metrics_sentences' = splitBy (Sentences 0) metrics_text
-- | Sentences -- | Sentences
metrics_sentences :: [Text] metrics_sentences :: [Text]
metrics_sentences = [ "A table is an object." metrics_sentences = ["A table is an object."
, "A glas is an object." ,"A glas is an object too."
, "The glas is on the table." ,"The glas and the spoon are on the table."
, "The spoon is an object." ,"The spoon is an object to eat."
, "The spoon is on the table." ,"The spoon is on the table and the plate and the glas."]
]
metrics_sentences_Test = splitBy (Sentences 0) metrics_text == metrics_sentences metrics_sentences_Test = metrics_sentences == metrics_sentences'
-- | Terms reordered to visually check occurrences -- | Terms reordered to visually check occurrences
metrics_terms :: [[[Text]]] metrics_terms :: [[Text]]
metrics_terms = [[["table"],["object"] ] metrics_terms = undefined
,[ ["object"],["glas"] ]
,[["table"], ["glas"] ] metrics_terms' :: IO [[Terms]]
,[ ["object"], ["spoon"]] metrics_terms' = mapM (terms Multi EN) $ splitBy (Sentences 0) metrics_text
,[["table"], ["spoon"]]
] --metrics_terms_Test = metrics_terms == ((map _terms_label) <$> metrics_terms')
--metrics_terms_Test = (mapM (terms Multi EN) $ splitBy (Sentences 0) metrics_text) == metrics_terms
-- | Occurrences -- | Occurrences
{- {-
...@@ -78,5 +85,3 @@ metrics_cooc' = (mapM (terms Multi EN) $ splitBy (Sentences 0) "The table object ...@@ -78,5 +85,3 @@ metrics_cooc' = (mapM (terms Multi EN) $ splitBy (Sentences 0) "The table object
{-| {-|
Module : Gargantext.Text.Metrics.Occurrences Module : Gargantext.Text.Metrics.Count
Description : Description :
Copyright : (c) CNRS, 2017-Present Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3 License : AGPL + CECILL v3
...@@ -25,7 +25,7 @@ Source : https://en.wikipedia.org/wiki/Type%E2%80%93token_distinction#Occurrence ...@@ -25,7 +25,7 @@ Source : https://en.wikipedia.org/wiki/Type%E2%80%93token_distinction#Occurrence
{-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE OverloadedStrings #-}
module Gargantext.Text.Metrics.Occurrences module Gargantext.Text.Metrics.Count
where where
...@@ -71,7 +71,6 @@ type Grouped = Stems ...@@ -71,7 +71,6 @@ type Grouped = Stems
--λ: cooc <$> Prelude.map occurrences <$> Prelude.mapM (terms Mono EN) ["blue lagoon", "blues lagoon blues lagoon", "red lagoon red lagoon", "red lagoon"] --λ: cooc <$> Prelude.map occurrences <$> Prelude.mapM (terms Mono EN) ["blue lagoon", "blues lagoon blues lagoon", "red lagoon red lagoon", "red lagoon"]
--fromList [((fromList ["blue"],fromList ["lagoon"]),2),((fromList ["lagoon"],fromList ["red"]),2)] --fromList [((fromList ["blue"],fromList ["lagoon"]),2),((fromList ["lagoon"],fromList ["red"]),2)]
---- ----
-} -}
type Occs = Int type Occs = Int
...@@ -81,10 +80,16 @@ removeApax :: Map (Label, Label) Int -> Map (Label, Label) Int ...@@ -81,10 +80,16 @@ removeApax :: Map (Label, Label) Int -> Map (Label, Label) Int
removeApax = DMS.filter (> 1) removeApax = DMS.filter (> 1)
cooc :: [[Terms]] -> Map (Label, Label) Int cooc :: [[Terms]] -> Map (Label, Label) Int
cooc tss = cooc tss = coocOnWithLabel _terms_stem (labelPolicy terms_occs) tss
mapKeys (delta $ labelPolicy terms_occs) $ coocOn _terms_stem tss
where where
terms_occs = occurrencesOn _terms_stem (List.concat tss) terms_occs = occurrencesOn _terms_stem (List.concat tss)
coocOnWithLabel :: (Ord label, Ord b) => (a -> b) -> (b -> label)
-> [[a]] -> Map (label, label) Coocs
coocOnWithLabel on policy tss =
mapKeys (delta policy) $ coocOn on tss
where
delta f = f *** f delta f = f *** f
...@@ -95,16 +100,16 @@ labelPolicy m g = case _terms_label <$> fst <$> maximumWith snd <$> DMS.toList ...@@ -95,16 +100,16 @@ labelPolicy m g = case _terms_label <$> fst <$> maximumWith snd <$> DMS.toList
coocOn :: Ord b => (a -> b) -> [[a]] -> Map (b, b) Coocs coocOn :: Ord b => (a -> b) -> [[a]] -> Map (b, b) Coocs
coocOn f as = foldl' (\a b -> DMS.unionWith (+) a b) empty $ map (coocOn' f) as coocOn f as = foldl' (\a b -> DMS.unionWith (+) a b) empty $ map (coocOn' f) as
coocOn' :: Ord b => (a -> b) -> [a] -> Map (b, b) Coocs
coocOn' f ts = foldl' (\m (xy,c) -> insertWith ((+)) xy c m) empty xs
where where
ts' = List.nub $ map f ts coocOn' :: Ord b => (a -> b) -> [a] -> Map (b, b) Coocs
xs = [ ((x, y), 1) coocOn' f ts = foldl' (\m (xy,c) -> insertWith ((+)) xy c m) empty xs
| x <- ts' where
, y <- ts' ts' = List.nub $ map f ts
, x < y xs = [ ((x, y), 1)
] | x <- ts'
, y <- ts'
, x < y
]
-- | Compute the grouped occurrences (occ) -- | Compute the grouped occurrences (occ)
...@@ -115,6 +120,7 @@ occurrencesOn :: (Ord a, Ord b) => (a -> b) -> [a] -> Map b (Map a Int) ...@@ -115,6 +120,7 @@ occurrencesOn :: (Ord a, Ord b) => (a -> b) -> [a] -> Map b (Map a Int)
occurrencesOn f = foldl' (\m a -> insertWith (unionWith (+)) (f a) (singleton a 1) m) empty occurrencesOn f = foldl' (\m a -> insertWith (unionWith (+)) (f a) (singleton a 1) m) empty
-- TODO add groups and filter stops -- TODO add groups and filter stops
sumOcc :: Ord a => [Occ a] -> Occ a sumOcc :: Ord a => [Occ a] -> Occ a
sumOcc xs = foldl' (unionWith (+)) empty xs sumOcc xs = foldl' (unionWith (+)) empty xs
......
...@@ -46,7 +46,7 @@ import Data.Maybe (Maybe(Just)) ...@@ -46,7 +46,7 @@ import Data.Maybe (Maybe(Just))
import qualified Gargantext.Prelude as P import qualified Gargantext.Prelude as P
import qualified Data.Array.Accelerate.Array.Representation as Repr import qualified Data.Array.Accelerate.Array.Representation as Repr
import Gargantext.Text.Metrics.Occurrences import Gargantext.Text.Metrics.Count
----------------------------------------------------------------------- -----------------------------------------------------------------------
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment