Examples.hs 5.24 KB
Newer Older
1
{-|
2
Module      : Gargantext.Core.Text.Examples
3 4 5 6 7 8 9 10 11 12
Description : Minimal Examples to test behavior of the functions.
Copyright   : (c) CNRS, 2017 - present
License     : AGPL + CECILL v3
Maintainer  : team@gargantext.org
Stability   : experimental
Portability : POSIX

This file is intended for these purposes:

- documentation for teaching and research
13
- learn basics of Haskell which is a scientific programming language
14
- behavioral tests (that should be completed with uni-tests and scale-tests)
15

16
This document defines basic of Text definitions according to Gargantext..
17 18 19 20 21

- What is a term ?
- What is a sentence ?
- What is a paragraph ?

22 23 24 25
-}

{-# LANGUAGE BangPatterns      #-}

Alexandre Delanoë's avatar
Alexandre Delanoë committed
26
module Core.Text.Examples
27 28
  where

Alexandre Delanoë's avatar
Alexandre Delanoë committed
29
{-
30
import Data.Array.Accelerate (toList, Matrix)
31
import Data.Map (Map)
32
import Data.Ord (Down(..))
33 34 35
import Data.Text (Text)
import Data.Tuple.Extra (both)
import Gargantext.Core (Lang(EN))
36
import Gargantext.Core.Methods.Distances.Accelerate.SpeGen
37 38
import Gargantext.Core.Text.Context (splitBy, SplitContext(Sentences))
import Gargantext.Core.Text.Metrics.Count (Grouped)
39 40 41
import Gargantext.Core.Text.Metrics.Count (occurrences, cooc)
import Gargantext.Core.Text.Terms (TermType(MonoMulti), terms)
import Gargantext.Core.Types (Terms(..), Label)
42
import Gargantext.Core.Viz.Graph.Index
43
import Gargantext.Prelude
44
import qualified Data.Array.Accelerate as DAA
45 46 47
import qualified Data.List as List
import qualified Data.Map  as Map
import qualified Data.Text as Text
48 49

-- | Sentences
50
-- Let be a list of Texts: ['Data.Text.Text']. Each text in this example is a sentence.
51
--
52
-- >>> ex_sentences
53
-- ["There is a table with a glass of wine and a spoon.","I can see the glass on the table.","There was only a spoon on that table.","The glass just fall from the table, pouring wine everywhere.","I wish the glass did not contain wine."]
54 55 56 57 58 59 60 61 62 63 64 65 66 67 68
ex_sentences :: [Text]
ex_sentences = [ "There is a table with a glass of wine and a spoon."
            , "I can see the glass on the table."
            , "There was only a spoon on that table."
            , "The glass just fall from the table, pouring wine everywhere."
            , "I wish the glass did not contain wine."
            ]


-- | From list to simple text as paragraph.
-- Let 'Data.Text.intercalate' each sentence with a space. Result is a paragraph.
--
-- >>> T.intercalate (T.pack " ") ex_sentences
-- "There is a table with a glass of wine and a spoon. I can see the glass on the table. There was only a spoon on that table. The glass just fall from the table, pouring wine everywhere. I wish the glass did not contain wine."
ex_paragraph :: Text
69
ex_paragraph = Text.intercalate " " ex_sentences
70

71
-- | Let split sentences by Contexts of text.
72
-- More about 'Gargantext.Core.Text.Context'
73 74 75
--
-- >>> ex_sentences == splitBy (Sentences 0) ex_paragraph
-- True
76 77 78 79

-- | Terms reordered to visually check occurrences
-- Split text by sentence and then extract ngrams.
--
80
-- >>> mapM (terms (MonoMulti EN)) $ splitBy (Sentences 0) ex_paragraph
81
-- [[["table"],["glass"],["wine"],["spoon"]],[["glass"],["table"]],[["spoon"],["table"]],[["glass"],["table"],["wine"]],[["glass"],["wine"]]]
82 83
ex_terms :: IO [[Terms]]
ex_terms = mapM (terms (MonoMulti EN)) $ splitBy (Sentences 0) ex_paragraph
84 85 86

-- | Test the Occurrences
--
87
-- >>> occurrences <$> List.concat <$> ex_terms
88
-- fromList [(fromList ["glass"],fromList [(["glass"],4)]),(fromList ["spoon"],fromList [(["spoon"],2)]),(fromList ["tabl"],fromList [(["table"],4)]),(fromList ["wine"],fromList [(["wine"],3)])]
89
ex_occ :: IO (Map Grouped (Map Terms Int))
90
ex_occ = occurrences <$> List.concat <$> ex_terms
91 92

-- | Test the cooccurrences
93
-- Use the 'Gargantext.Core.Text.Metrics.Count.cooc' function.
94
--
95
-- >>> cooc <$> ex_terms
96
-- fromList [((["glass"],["glass"]),4),((["spoon"],["glass"]),1),((["spoon"],["spoon"]),2),((["table"],["glass"]),3),((["table"],["spoon"]),2),((["table"],["table"]),4),((["wine"],["glass"]),3),((["wine"],["spoon"]),1),((["wine"],["table"]),2),((["wine"],["wine"]),3)]
97 98
ex_cooc :: IO (Map (Label, Label) Int)
ex_cooc = cooc <$> ex_terms
99

100 101 102 103 104 105 106 107 108 109 110 111
-- | Tests the specificity and genericity
--
-- >>> ex_cooc_mat
-- (fromList [(["glass"],0),(["spoon"],1),(["table"],2),(["wine"],3)],Matrix (Z :. 4 :. 4) 
--   [ 4, 0, 0, 0,
--     1, 2, 0, 0,
--     3, 2, 4, 0,
--     3, 1, 2, 3],Matrix (Z :. 4 :. 4) 
--   [ 1.0, 0.25, 0.75, 0.75,
--     0.0,  1.0,  1.0,  0.5,
--     0.0,  0.0,  1.0,  0.5,
--     0.0,  0.0,  0.0,  1.0],(Vector (Z :. 4) [0.5833333333333334,0.5833333333333334,0.75,0.5833333333333334],Vector (Z :. 4) [-0.5833333333333334,-0.4166666666666667,0.41666666666666674,0.5833333333333334]))
112
ex_cooc_mat :: IO (Map Label Index, Matrix Int, Matrix Double, (DAA.Vector GenericityInclusion, DAA.Vector SpecificityExclusion))
113 114
ex_cooc_mat = do
  m <- ex_cooc
115
  let (ti,_) = createIndices m
116
  let mat_cooc = cooc2mat Triangle ti m
117 118 119 120 121 122
  pure ( ti
       , mat_cooc
       , incExcSpeGen_proba  mat_cooc
       , incExcSpeGen        mat_cooc
       )

123 124
ex_incExcSpeGen :: IO ([(Label, Double)], [(Label, Double)])
ex_incExcSpeGen = incExcSpeGen_sorted <$> ex_cooc
125 126

incExcSpeGen_sorted :: Ord t => Map (t,t) Int -> ([(t,Double)],[(t,Double)])
127
incExcSpeGen_sorted m = both ordonne (incExcSpeGen $ cooc2mat Triangle ti m)
128 129
  where
    (ti,fi) = createIndices m
130
    ordonne x = sortWith (Down . snd)
131
              $ zip (map snd $ Map.toList fi) (toList x)
132

Alexandre Delanoë's avatar
Alexandre Delanoë committed
133
-}