Commit 6f66c3dc authored by Alexandre Delanoë's avatar Alexandre Delanoë

[DOC] adding a file for examples, text ngrams extraction and some metrics (begin).

parent fcec7770
......@@ -33,6 +33,7 @@ library:
- Gargantext.Text.Context
- Gargantext.Text.List.CSV
- Gargantext.Text.Metrics
- Gargantext.Text.Metrics.Examples
- Gargantext.Text.Metrics.Count
- Gargantext.Text.Metrics.CharByChar
- Gargantext.Text.Parsers.CSV
......
......@@ -24,22 +24,9 @@ import qualified Data.List as L
import Data.Map (Map)
import qualified Data.Map as M
import Data.Text (Text)
import qualified Data.Text as T
import Data.Tuple.Extra (both)
--import GHC.Real (Ratio)
--import qualified Data.Text.Metrics as DTM
import Data.Array.Accelerate (toList, Matrix)
--import Math.KMeans (kmeans, euclidSq, elements)
import Gargantext.Prelude
import Gargantext.Text.Metrics.Count (occurrences, cooc)
import Gargantext.Text.Terms (TermType(MonoMulti), terms)
import Gargantext.Core (Lang(EN))
import Gargantext.Core.Types (Terms(..), Label)
import Gargantext.Text.Context (splitBy, SplitContext(Sentences))
import Gargantext.Text.Metrics.Count (Grouped)
import Gargantext.Viz.Graph.Distances.Matrice
import Gargantext.Viz.Graph.Index
......@@ -121,90 +108,3 @@ coocScored m = zipWith (\(_,t) (inc,spe) -> Scored t inc spe) (M.toList fi) scor
(ti,fi) = createIndices m
(is, ss) = incExcSpeGen $ cooc2mat ti m
scores = DAA.toList $ DAA.run $ DAA.zip (DAA.use is) (DAA.use ss)
incExcSpeGen_sorted :: Ord t => Map (t,t) Int -> ([(t,Double)],[(t,Double)])
incExcSpeGen_sorted m = both ordonne (incExcSpeGen $ cooc2mat ti m)
where
(ti,fi) = createIndices m
ordonne x = sortWith (Down . snd) $ zip (map snd $ M.toList fi) (toList x)
metrics_text :: Text
metrics_text = T.intercalate " " metrics_sentences
metrics_sentences' :: [Text]
metrics_sentences' = splitBy (Sentences 0) metrics_text
-- | Sentences
metrics_sentences :: [Text]
metrics_sentences = [ "There is a table with a glass of wine and a spoon."
, "I can see the glass on the table."
, "There was only a spoon on that table."
, "The glass just fall from the table, pouring wine everywhere."
, "I wish the glass did not contain wine."
]
metrics_sentences_Test :: Bool
metrics_sentences_Test = metrics_sentences == metrics_sentences'
-- | Terms reordered to visually check occurrences
-- >>> metrics_terms
-- [[["table"],["glass"],["wine"],["spoon"]],[["glass"],["table"]],[["spoon"],["table"]],[["glass"],["table"],["wine"]],[["glass"],["wine"]]]
metrics_terms :: IO [[Terms]]
metrics_terms = mapM (terms (MonoMulti EN)) $ splitBy (Sentences 0) metrics_text
-- | Occurrences
{-
fromList [ (fromList ["table"] ,fromList [(["table"] , 3 )])]
, (fromList ["object"],fromList [(["object"], 3 )])
, (fromList ["glas"] ,fromList [(["glas"] , 2 )])
, (fromList ["spoon"] ,fromList [(["spoon"] , 2 )])
-}
metrics_occ :: IO (Map Grouped (Map Terms Int))
metrics_occ = occurrences <$> L.concat <$> metrics_terms
{-
-- fromList [((["glas"],["object"]),6)
,((["glas"],["spoon"]),4)
,((["glas"],["table"]),6)
,((["object"],["spoon"]),6)
,((["object"],["table"]),9)
,((["spoon"],["table"]),6)]
-}
metrics_cooc :: IO (Map (Label, Label) Int)
metrics_cooc = cooc <$> metrics_terms
metrics_cooc_mat :: IO (Map Label Index, Matrix Int, Matrix Double, (DAA.Vector InclusionExclusion, DAA.Vector SpecificityGenericity))
metrics_cooc_mat = do
m <- metrics_cooc
let (ti,_) = createIndices m
let mat_cooc = cooc2mat ti m
pure ( ti
, mat_cooc
, incExcSpeGen_proba mat_cooc
, incExcSpeGen mat_cooc
)
metrics_incExcSpeGen :: IO ([(Label, Double)], [(Label, Double)])
metrics_incExcSpeGen = incExcSpeGen_sorted <$> metrics_cooc
{-|
Module : Gargantext.Text.Metrics.Examples
Description : Minimal Examples to test behavior of the functions.
Copyright : (c) CNRS, 2017 - present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
This file is intended for these purposes:
- documentation for teaching and research
- behavioral tests (that should be completed with uni-tests and scale-tests
-}
{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE OverloadedStrings #-}
module Gargantext.Text.Metrics.Examples
where
import Data.Ord (Down(..))
import qualified Data.List as L
import Data.Map (Map)
import qualified Data.Map as M
import Data.Text (Text)
import qualified Data.Text as T
import Data.Tuple.Extra (both)
import Data.Array.Accelerate (toList, Matrix)
import Gargantext.Prelude
import Gargantext.Text.Metrics.Count (occurrences, cooc)
import Gargantext.Text.Terms (TermType(MonoMulti), terms)
import Gargantext.Core (Lang(EN))
import Gargantext.Core.Types (Terms(..), Label)
import Gargantext.Text.Context (splitBy, SplitContext(Sentences))
import Gargantext.Text.Metrics.Count (Grouped)
import Gargantext.Viz.Graph.Distances.Matrice
import Gargantext.Viz.Graph.Index
import qualified Data.Array.Accelerate as DAA
-- | From list to simple text
--
-- >>> metrics_text
-- "There is a table with a glass of wine and a spoon. I can see the glass on the table. There was only a spoon on that table. The glass just fall from the table, pouring wine everywhere. I wish the glass did not contain wine."
metrics_text :: Text
metrics_text = T.intercalate " " metrics_sentences
-- | Sentences
--
-- >>> metrics_sentences
-- ["There is a table with a glass of wine and a spoon.","I can see the glass on the table.","There was only a spoon on that table.","The glass just fall from the table, pouring wine everywhere.","I wish the glass did not contain wine."]
metrics_sentences :: [Text]
metrics_sentences = [ "There is a table with a glass of wine and a spoon."
, "I can see the glass on the table."
, "There was only a spoon on that table."
, "The glass just fall from the table, pouring wine everywhere."
, "I wish the glass did not contain wine."
]
metrics_sentences_Test :: Bool
metrics_sentences_Test = metrics_sentences == splitBy (Sentences 0) metrics_text
-- | Terms reordered to visually check occurrences
-- Split text by sentence and then extract ngrams.
--
-- >>> metrics_terms
-- [[["table"],["glass"],["wine"],["spoon"]],[["glass"],["table"]],[["spoon"],["table"]],[["glass"],["table"],["wine"]],[["glass"],["wine"]]]
metrics_terms :: IO [[Terms]]
metrics_terms = mapM (terms (MonoMulti EN)) $ splitBy (Sentences 0) metrics_text
-- | Test the Occurrences
--
-- >>> metrics_occ
-- fromList [(fromList ["glass"],fromList [(["glass"],4)]),(fromList ["spoon"],fromList [(["spoon"],2)]),(fromList ["tabl"],fromList [(["table"],4)]),(fromList ["wine"],fromList [(["wine"],3)])]
metrics_occ :: IO (Map Grouped (Map Terms Int))
metrics_occ = occurrences <$> L.concat <$> metrics_terms
-- | Test the cooccurrences
--
-- >>> metrics_cooc
-- fromList [((["glass"],["glass"]),4),((["spoon"],["glass"]),1),((["spoon"],["spoon"]),2),((["table"],["glass"]),3),((["table"],["spoon"]),2),((["table"],["table"]),4),((["wine"],["glass"]),3),((["wine"],["spoon"]),1),((["wine"],["table"]),2),((["wine"],["wine"]),3)]
metrics_cooc :: IO (Map (Label, Label) Int)
metrics_cooc = cooc <$> metrics_terms
-- | Tests
metrics_cooc_mat :: IO (Map Label Index, Matrix Int, Matrix Double, (DAA.Vector InclusionExclusion, DAA.Vector SpecificityGenericity))
metrics_cooc_mat = do
m <- metrics_cooc
let (ti,_) = createIndices m
let mat_cooc = cooc2mat ti m
pure ( ti
, mat_cooc
, incExcSpeGen_proba mat_cooc
, incExcSpeGen mat_cooc
)
metrics_incExcSpeGen :: IO ([(Label, Double)], [(Label, Double)])
metrics_incExcSpeGen = incExcSpeGen_sorted <$> metrics_cooc
incExcSpeGen_sorted :: Ord t => Map (t,t) Int -> ([(t,Double)],[(t,Double)])
incExcSpeGen_sorted m = both ordonne (incExcSpeGen $ cooc2mat ti m)
where
(ti,fi) = createIndices m
ordonne x = sortWith (Down . snd) $ zip (map snd $ M.toList fi) (toList x)
{-|
Module : Gargantext.Text.Metrics.Freq
Description :
Description : Some functions to count.
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
Here is a longer description of this module, containing some
commentary with @some markup@.
-}
{-# LANGUAGE NoImplicitPrelude #-}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment