Examples.hs 5.23 KB
Newer Older
1
{-|
2
Module      : Gargantext.Text.Examples
3 4 5 6 7 8 9 10 11 12
Description : Minimal Examples to test behavior of the functions.
Copyright   : (c) CNRS, 2017 - present
License     : AGPL + CECILL v3
Maintainer  : team@gargantext.org
Stability   : experimental
Portability : POSIX

This file is intended for these purposes:

- documentation for teaching and research
13
- learn basics of Haskell which is a scientific programming language
14
- behavioral tests (that should be completed with uni-tests and scale-tests)
15

16
This document defines basic of Text definitions according to Gargantext..
17 18 19 20 21

- What is a term ?
- What is a sentence ?
- What is a paragraph ?

22 23 24 25 26 27
-}

{-# LANGUAGE BangPatterns      #-}
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE OverloadedStrings #-}

28
module Gargantext.Text.Examples
29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55
  where

import Data.Ord (Down(..))
import qualified Data.List as L

import Data.Map (Map)
import qualified Data.Map  as M

import Data.Text (Text)
import qualified Data.Text as T

import Data.Tuple.Extra (both)
import Data.Array.Accelerate (toList, Matrix)

import Gargantext.Prelude
import Gargantext.Text.Metrics.Count (occurrences, cooc)
import Gargantext.Text.Terms (TermType(MonoMulti), terms)
import Gargantext.Core (Lang(EN))
import Gargantext.Core.Types (Terms(..), Label)
import Gargantext.Text.Context (splitBy, SplitContext(Sentences))
import Gargantext.Text.Metrics.Count (Grouped)
import Gargantext.Viz.Graph.Distances.Matrice
import Gargantext.Viz.Graph.Index

import qualified Data.Array.Accelerate as DAA

-- | Sentences
56
-- Let be a list of Texts: ['Data.Text.Text']. Each text in this example is a sentence.
57
--
58
-- >>> ex_sentences
59
-- ["There is a table with a glass of wine and a spoon.","I can see the glass on the table.","There was only a spoon on that table.","The glass just fall from the table, pouring wine everywhere.","I wish the glass did not contain wine."]
60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75
ex_sentences :: [Text]
ex_sentences = [ "There is a table with a glass of wine and a spoon."
            , "I can see the glass on the table."
            , "There was only a spoon on that table."
            , "The glass just fall from the table, pouring wine everywhere."
            , "I wish the glass did not contain wine."
            ]


-- | From list to simple text as paragraph.
-- Let 'Data.Text.intercalate' each sentence with a space. Result is a paragraph.
--
-- >>> T.intercalate (T.pack " ") ex_sentences
-- "There is a table with a glass of wine and a spoon. I can see the glass on the table. There was only a spoon on that table. The glass just fall from the table, pouring wine everywhere. I wish the glass did not contain wine."
ex_paragraph :: Text
ex_paragraph = T.intercalate " " ex_sentences
76

77 78 79 80 81
-- | Let split sentences by Contexts of text.
-- More about 'Gargantext.Text.Context'
--
-- >>> ex_sentences == splitBy (Sentences 0) ex_paragraph
-- True
82 83 84 85

-- | Terms reordered to visually check occurrences
-- Split text by sentence and then extract ngrams.
--
86
-- >>> mapM (terms (MonoMulti EN)) $ splitBy (Sentences 0) ex_paragraph
87
-- [[["table"],["glass"],["wine"],["spoon"]],[["glass"],["table"]],[["spoon"],["table"]],[["glass"],["table"],["wine"]],[["glass"],["wine"]]]
88 89
ex_terms :: IO [[Terms]]
ex_terms = mapM (terms (MonoMulti EN)) $ splitBy (Sentences 0) ex_paragraph
90 91 92

-- | Test the Occurrences
--
93
-- >>> occurrences <$> L.concat <$> ex_terms
94
-- fromList [(fromList ["glass"],fromList [(["glass"],4)]),(fromList ["spoon"],fromList [(["spoon"],2)]),(fromList ["tabl"],fromList [(["table"],4)]),(fromList ["wine"],fromList [(["wine"],3)])]
95 96
ex_occ :: IO (Map Grouped (Map Terms Int))
ex_occ = occurrences <$> L.concat <$> ex_terms
97 98

-- | Test the cooccurrences
99
-- Use the 'Gargantext.Text.Metrics.Count.cooc' function.
100
--
101
-- >>> cooc <$> ex_terms
102
-- fromList [((["glass"],["glass"]),4),((["spoon"],["glass"]),1),((["spoon"],["spoon"]),2),((["table"],["glass"]),3),((["table"],["spoon"]),2),((["table"],["table"]),4),((["wine"],["glass"]),3),((["wine"],["spoon"]),1),((["wine"],["table"]),2),((["wine"],["wine"]),3)]
103 104
ex_cooc :: IO (Map (Label, Label) Int)
ex_cooc = cooc <$> ex_terms
105

106 107 108 109 110 111 112 113 114 115 116 117
-- | Tests the specificity and genericity
--
-- >>> ex_cooc_mat
-- (fromList [(["glass"],0),(["spoon"],1),(["table"],2),(["wine"],3)],Matrix (Z :. 4 :. 4) 
--   [ 4, 0, 0, 0,
--     1, 2, 0, 0,
--     3, 2, 4, 0,
--     3, 1, 2, 3],Matrix (Z :. 4 :. 4) 
--   [ 1.0, 0.25, 0.75, 0.75,
--     0.0,  1.0,  1.0,  0.5,
--     0.0,  0.0,  1.0,  0.5,
--     0.0,  0.0,  0.0,  1.0],(Vector (Z :. 4) [0.5833333333333334,0.5833333333333334,0.75,0.5833333333333334],Vector (Z :. 4) [-0.5833333333333334,-0.4166666666666667,0.41666666666666674,0.5833333333333334]))
118 119 120
ex_cooc_mat :: IO (Map Label Index, Matrix Int, Matrix Double, (DAA.Vector InclusionExclusion, DAA.Vector SpecificityGenericity))
ex_cooc_mat = do
  m <- ex_cooc
121 122 123 124 125 126 127 128
  let (ti,_) = createIndices m
  let mat_cooc = cooc2mat ti m
  pure ( ti
       , mat_cooc
       , incExcSpeGen_proba  mat_cooc
       , incExcSpeGen        mat_cooc
       )

129 130
ex_incExcSpeGen :: IO ([(Label, Double)], [(Label, Double)])
ex_incExcSpeGen = incExcSpeGen_sorted <$> ex_cooc
131 132 133 134 135

incExcSpeGen_sorted :: Ord t => Map (t,t) Int -> ([(t,Double)],[(t,Double)])
incExcSpeGen_sorted m = both ordonne (incExcSpeGen $ cooc2mat ti m)
  where
    (ti,fi) = createIndices m
136 137
    ordonne x = sortWith (Down . snd)
              $ zip (map snd $ M.toList fi) (toList x)
138 139