Context.hs 1.88 KB
Newer Older
Alexandre Delanoë's avatar
Alexandre Delanoë committed
1
{-|
2
Module      : Gargantext.Core.Text.Context
3
Description : How to manage contexts of texts ?
Alexandre Delanoë's avatar
Alexandre Delanoë committed
4 5 6 7 8 9
Copyright   : (c) CNRS, 2017-Present
License     : AGPL + CECILL v3
Maintainer  : team@gargantext.org
Stability   : experimental
Portability : POSIX

10 11 12 13 14 15 16 17 18
Context of text management tool, here are logic of main types:

- Term
- Multi-term
- Label
- Sentence
- Corpus

How to split contexts is describes in this module.
Alexandre Delanoë's avatar
Alexandre Delanoë committed
19 20 21 22

-}


23
module Gargantext.Core.Text.Context
24
  where
Alexandre Delanoë's avatar
Alexandre Delanoë committed
25

26
import Data.Text (Text, pack, unpack)
Alexandre Delanoë's avatar
Alexandre Delanoë committed
27 28
import Data.String (IsString)

29
import Text.HTML.TagSoup (parseTags, isTagText, Tag(..))
30
import Gargantext.Core.Text
Alexandre Delanoë's avatar
Alexandre Delanoë committed
31 32
import Gargantext.Prelude hiding (length)

33 34
------------------------------------------------------------------------
type Term = Text
Nicolas Pouillard's avatar
Nicolas Pouillard committed
35 36
type MultiTerm = [Term]
type Label = MultiTerm
37

Nicolas Pouillard's avatar
Nicolas Pouillard committed
38
type TermList = [(Label, [MultiTerm])]
39

40 41 42 43 44 45 46
type Sentence  a = [a] -- or a nominal group
type Corpus    a = [Sentence a] -- a list of sentences

-- type ConText a = [Sentence a]
-- type Corpus a = [ConText a]
------------------------------------------------------------------------

47
-- | Contexts definition to build/unbuild contexts.
48 49 50
data SplitContext = Chars Int | Sentences Int | Paragraphs Int

-- | splitBy contexts of Chars or Sentences or Paragraphs
51
-- To see some examples at a higher level (sentences and paragraph), see
52
-- 'Gargantext.Core.Text.Examples.ex_terms'
53 54
--
-- >>> splitBy (Chars 0) (pack "abcde")
55
-- ["a","b","c","d","e"]
56 57
--
-- >>> splitBy (Chars 1) (pack "abcde")
58
-- ["ab","bc","cd","de"]
59 60
--
-- >>> splitBy (Chars 2) (pack "abcde")
61 62 63 64
-- ["abc","bcd","cde"]
splitBy :: SplitContext -> Text -> [Text]
splitBy (Chars     n)  = map pack        . chunkAlong (n+1) 1 . unpack
splitBy (Sentences n)  = map unsentences . chunkAlong (n+1) 1 . sentences
Alexandre Delanoë's avatar
Alexandre Delanoë committed
65
splitBy (Paragraphs _) = map unTag       . filter isTagText   . parseTags
Alexandre Delanoë's avatar
Alexandre Delanoë committed
66
  where
67 68 69
    unTag :: IsString p => Tag p -> p
    unTag (TagText x) = x
    unTag _           = ""
Alexandre Delanoë's avatar
Alexandre Delanoë committed
70