[DOC+TESTS] contexts of texts.

16a85fb4 · Alexandre Delanoë · 198b4ef8 · 16a85fb4 · 16a85fb4 · 16a85fb4
Commit 16a85fb4 authored Aug 23, 2018 by Alexandre Delanoë
6 changed files
--- a/package.yaml
+++ b/package.yaml
@@ -25,26 +25,29 @@ library:
  exposed-modules:
  - Gargantext
  - Gargantext.API
+  - Gargantext.API.Auth
+  - Gargantext.API.Count
+  - Gargantext.API.FrontEnd
  - Gargantext.API.Orchestrator
  - Gargantext.API.Search
-  - Gargantext.API.FrontEnd
-  - Gargantext.API.Count
-  - Gargantext.API.Auth
  - Gargantext.API.Settings
  - Gargantext.Core
  - Gargantext.Core.Types
+  - Gargantext.Core.Types.Main
+  - Gargantext.Core.Types.Node
+  - Gargantext.Core.Utils.Prefix
  - Gargantext.Database
  - Gargantext.Prelude
  - Gargantext.Text
  - Gargantext.Text.Context
+  - Gargantext.Text.Examples
  - Gargantext.Text.List.CSV
  - Gargantext.Text.Metrics
-  - Gargantext.Text.Metrics.Examples
-  - Gargantext.Text.Metrics.Count
  - Gargantext.Text.Metrics.CharByChar
+  - Gargantext.Text.Metrics.Count
  - Gargantext.Text.Parsers.CSV
-  - Gargantext.Text.Parsers.WOS
  - Gargantext.Text.Parsers.Date
+  - Gargantext.Text.Parsers.WOS
  - Gargantext.Text.Search
  - Gargantext.Text.Terms
  - Gargantext.Text.Terms.WithList

--- a/src-doctest/Main.hs
+++ b/src-doctest/Main.hs
@@ -2,5 +2,5 @@ import System.FilePath.Glob
 import Test.DocTest

 main :: IO ()
-main = glob "src/Gargantext/Text/Metrics.hs" >>= doctest
+main = glob "src/Gargantext/Text/" >>= doctest

--- a/src/Gargantext/Prelude.hs
+++ b/src/Gargantext/Prelude.hs
@@ -30,6 +30,7 @@ module Gargantext.Prelude

 import GHC.Exts (sortWith)

+import Control.Monad.IO.Class (MonadIO)
 import Data.Maybe (isJust, fromJust, maybe)
 import Protolude ( Bool(True, False), Int, Int64, Double, Integer
                 , Fractional, Num, Maybe(Just,Nothing)
@@ -73,6 +74,11 @@ import Text.Read (Read())
 import Data.String.Conversions (cs)


+printDebug :: (Show a, MonadIO m) => [Char] -> a -> m ()
+printDebug msg x = putStrLn $ msg <> " " <> show x
+-- printDebug _ _ = pure ()
+
+
 map2 :: (t -> b) -> [[t]] -> [[b]]
 map2 fun = map (map fun)


--- a/src/Gargantext/Text/Context.hs
+++ b/src/Gargantext/Text/Context.hs
 {-|
 Module      : Gargantext.Text.Context
-Description : 
+Description : How to manage contexts of texts ?
 Copyright   : (c) CNRS, 2017-Present
 License     : AGPL + CECILL v3
 Maintainer  : team@gargantext.org
 Stability   : experimental
 Portability : POSIX

-Context of text management tool, here are logic of main types.
+Context of text management tool, here are logic of main types:
+
+- Term
+- Multi-term
+- Label
+- Sentence
+- Corpus
+
+How to split contexts is describes in this module.

 -}

@@ -24,9 +32,7 @@ import Text.HTML.TagSoup (parseTags, isTagText, Tag(..))
 import Gargantext.Text
 import Gargantext.Prelude hiding (length)

-
 ------------------------------------------------------------------------
-
 type Term = Text
 type MultiTerm = [Term]
 type Label = MultiTerm
@@ -38,21 +44,25 @@ type Corpus    a = [Sentence a] -- a list of sentences

 -- type ConText a = [Sentence a]
 -- type Corpus a = [ConText a]
-
-
 ------------------------------------------------------------------------

+-- | Contexts definition to build/unbuild contexts.
 data SplitContext = Chars Int | Sentences Int | Paragraphs Int

 tag :: Text -> [Tag Text]
 tag = parseTags

 -- | splitBy contexts of Chars or Sentences or Paragraphs
-- >> splitBy (Chars 0) "abcde"
+-- To see some examples at a higher level (sentences and paragraph), see
+-- 'Gargantext.Text.Examples.ex_terms'
+--
+-- >>> splitBy (Chars 0) (pack "abcde")
 -- ["a","b","c","d","e"]
-- >> splitBy (Chars 1) "abcde"
+--
+-- >>> splitBy (Chars 1) (pack "abcde")
 -- ["ab","bc","cd","de"]
-- >> splitBy (Chars 2) "abcde"
+--
+-- >>> splitBy (Chars 2) (pack "abcde")
 -- ["abc","bcd","cde"]
 splitBy :: SplitContext -> Text -> [Text]
 splitBy (Chars     n)  = map pack        . chunkAlong (n+1) 1 . unpack

--- a/src/Gargantext/Text/Metrics/Examples.hs
+++ b/src/Gargantext/Text/Metrics/Examples.hs
 {-|
-Module      : Gargantext.Text.Metrics.Examples
+Module      : Gargantext.Text.Examples
 Description : Minimal Examples to test behavior of the functions.
 Copyright   : (c) CNRS, 2017 - present
 License     : AGPL + CECILL v3
@@ -10,15 +10,23 @@ Portability : POSIX
 This file is intended for these purposes:

 - documentation for teaching and research
+- learn basics of Haskell which is a scientific programming language
 - behavioral tests (that should be completed with uni-tests and scale-tests

+This documents defines basic of Text definitions according to Gargantext..
+
+- What is a term ?
+- What is a sentence ?
+- What is a paragraph ?
+
+
 -}

 {-# LANGUAGE BangPatterns      #-}
 {-# LANGUAGE NoImplicitPrelude #-}
 {-# LANGUAGE OverloadedStrings #-}

-module Gargantext.Text.Metrics.Examples
+module Gargantext.Text.Examples
  where

 import Data.Ord (Down(..))
@@ -45,56 +53,61 @@ import Gargantext.Viz.Graph.Index

 import qualified Data.Array.Accelerate as DAA

-
-- | From list to simple text
-- 
-- >>> metrics_text
-- "There is a table with a glass of wine and a spoon. I can see the glass on the table. There was only a spoon on that table. The glass just fall from the table, pouring wine everywhere. I wish the glass did not contain wine."
-metrics_text :: Text
-metrics_text = T.intercalate " " metrics_sentences
-
-
 -- | Sentences
+-- Let be a list of Texts: ['Data.Text.Text']. Each text in this example is a sentence.
 --
-- >>> metrics_sentences
+-- >>> ex_sentences
 -- ["There is a table with a glass of wine and a spoon.","I can see the glass on the table.","There was only a spoon on that table.","The glass just fall from the table, pouring wine everywhere.","I wish the glass did not contain wine."]
-metrics_sentences :: [Text]
-metrics_sentences = [ "There is a table with a glass of wine and a spoon."
-                    , "I can see the glass on the table."
-                    , "There was only a spoon on that table."
-                    , "The glass just fall from the table, pouring wine everywhere."
-                    , "I wish the glass did not contain wine."
-                    ]
+ex_sentences :: [Text]
+ex_sentences = [ "There is a table with a glass of wine and a spoon."
+            , "I can see the glass on the table."
+            , "There was only a spoon on that table."
+            , "The glass just fall from the table, pouring wine everywhere."
+            , "I wish the glass did not contain wine."
+            ]
+
+
+-- | From list to simple text as paragraph.
+-- Let 'Data.Text.intercalate' each sentence with a space. Result is a paragraph.
+--
+-- >>> T.intercalate (T.pack " ") ex_sentences
+-- "There is a table with a glass of wine and a spoon. I can see the glass on the table. There was only a spoon on that table. The glass just fall from the table, pouring wine everywhere. I wish the glass did not contain wine."
+ex_paragraph :: Text
+ex_paragraph = T.intercalate " " ex_sentences

-metrics_sentences_Test :: Bool
-metrics_sentences_Test = metrics_sentences == splitBy (Sentences 0) metrics_text
+-- | Let split sentences by Contexts of text.
+-- More about 'Gargantext.Text.Context'
+--
+-- >>> ex_sentences == splitBy (Sentences 0) ex_paragraph
+-- True

 -- | Terms reordered to visually check occurrences
 -- Split text by sentence and then extract ngrams.
 --
-- >>> metrics_terms
+-- >>> mapM (terms (MonoMulti EN)) $ splitBy (Sentences 0) ex_paragraph
 -- [[["table"],["glass"],["wine"],["spoon"]],[["glass"],["table"]],[["spoon"],["table"]],[["glass"],["table"],["wine"]],[["glass"],["wine"]]]
-metrics_terms :: IO [[Terms]]
-metrics_terms = mapM (terms (MonoMulti EN)) $ splitBy (Sentences 0) metrics_text
+ex_terms :: IO [[Terms]]
+ex_terms = mapM (terms (MonoMulti EN)) $ splitBy (Sentences 0) ex_paragraph

 -- | Test the Occurrences
 --
-- >>> metrics_occ
+-- >>> occurrences <$> L.concat <$> ex_terms
 -- fromList [(fromList ["glass"],fromList [(["glass"],4)]),(fromList ["spoon"],fromList [(["spoon"],2)]),(fromList ["tabl"],fromList [(["table"],4)]),(fromList ["wine"],fromList [(["wine"],3)])]
-metrics_occ :: IO (Map Grouped (Map Terms Int))
-metrics_occ = occurrences <$> L.concat <$> metrics_terms
+ex_occ :: IO (Map Grouped (Map Terms Int))
+ex_occ = occurrences <$> L.concat <$> ex_terms

 -- | Test the cooccurrences
+-- Use the 'Gargantext.Text.Metrics.Count.cooc' function.
 --
-- >>> metrics_cooc
+-- >>> cooc <$> ex_terms
 -- fromList [((["glass"],["glass"]),4),((["spoon"],["glass"]),1),((["spoon"],["spoon"]),2),((["table"],["glass"]),3),((["table"],["spoon"]),2),((["table"],["table"]),4),((["wine"],["glass"]),3),((["wine"],["spoon"]),1),((["wine"],["table"]),2),((["wine"],["wine"]),3)]
-metrics_cooc :: IO (Map (Label, Label) Int)
-metrics_cooc = cooc <$> metrics_terms
+ex_cooc :: IO (Map (Label, Label) Int)
+ex_cooc = cooc <$> ex_terms

 -- | Tests 
-metrics_cooc_mat :: IO (Map Label Index, Matrix Int, Matrix Double, (DAA.Vector InclusionExclusion, DAA.Vector SpecificityGenericity))
-metrics_cooc_mat = do
-  m <- metrics_cooc
+ex_cooc_mat :: IO (Map Label Index, Matrix Int, Matrix Double, (DAA.Vector InclusionExclusion, DAA.Vector SpecificityGenericity))
+ex_cooc_mat = do
+  m <- ex_cooc
  let (ti,_) = createIndices m
  let mat_cooc = cooc2mat ti m
  pure ( ti
@@ -103,8 +116,8 @@ metrics_cooc_mat = do
       , incExcSpeGen        mat_cooc
       )

-metrics_incExcSpeGen :: IO ([(Label, Double)], [(Label, Double)])
-metrics_incExcSpeGen = incExcSpeGen_sorted <$> metrics_cooc
+ex_incExcSpeGen :: IO ([(Label, Double)], [(Label, Double)])
+ex_incExcSpeGen = incExcSpeGen_sorted <$> ex_cooc

 incExcSpeGen_sorted :: Ord t => Map (t,t) Int -> ([(t,Double)],[(t,Double)])
 incExcSpeGen_sorted m = both ordonne (incExcSpeGen $ cooc2mat ti m)

--- a/src/Gargantext/TextFlow.hs
+++ b/src/Gargantext/TextFlow.hs
@@ -21,7 +21,6 @@ import GHC.IO (FilePath)
 import qualified Data.Text as T
 import Data.Text.IO (readFile)

-import Control.Monad.IO.Class (MonadIO)

 import Data.Map.Strict (Map)
 import qualified Data.Array.Accelerate as A
@@ -53,9 +52,6 @@ import Data.Graph.Clustering.Louvain.CplusPlus (cLouvain, LouvainNode(..))
                 |___/
 -}

-printDebug :: (Show a, MonadIO m) => [Char] -> a -> m ()
-printDebug msg x = putStrLn $ msg <> " " <> show x
-- printDebug _ _ = pure ()

 data TextFlow = CSV FilePath
              | FullText FilePath
@@ -97,7 +93,7 @@ textFlow' termType contexts = do
  let myCooc2 = M.filter (>1) myCooc1
  printDebug "myCooc2" (M.size myCooc2)

-  -- Filtering terms with inclusion/Exclusion and Specifity/Genericity scores
+  -- Filtering terms with inclusion/Exclusion and Specificity/Genericity scores
  let myCooc3 = filterCooc ( FilterConfig (MapListSize    100 )
                                          (InclusionSize  400 )
                                          (SampleBins      10 )
@@ -105,7 +101,7 @@ textFlow' termType contexts = do
                                          (DefaultValue     0 )
                           ) myCooc2
  printDebug "myCooc3" $ M.size myCooc3
-  putStrLn $ show myCooc3
+  -- putStrLn $ show myCooc3

  -- Cooc -> Matrix
  let (ti, _) = createIndices myCooc3
@@ -115,12 +111,12 @@ textFlow' termType contexts = do
  printDebug "myCooc4" $ M.size myCooc4

  let matCooc = map2mat (0) (M.size ti) myCooc4
-  --printDebug "matCooc" matCooc
+  -- printDebug "matCooc" matCooc
  -- Matrix -> Clustering
  let distanceMat = conditional matCooc
 --  let distanceMat = distributional matCooc
  printDebug "distanceMat" $ A.arrayShape distanceMat
-  --printDebug "distanceMat" distanceMat
+  -- printDebug "distanceMat" distanceMat
 --
  let distanceMap = mat2map distanceMat
  printDebug "distanceMap" $ M.size distanceMap
@@ -135,7 +131,8 @@ textFlow' termType contexts = do
  pure $ data2graph (M.toList ti) myCooc4 distanceMap partitions

 -----------------------------------------------------------
-- distance should not be a map since we just "toList" it (same as cLouvain)
+-- | From data to Graph
+-- FIXME: distance should not be a map since we just "toList" it (same as cLouvain)
 data2graph :: [(Label, Int)] -> Map (Int, Int) Int
                             -> Map (Int, Int) Double
                             -> [LouvainNode]