Commit f4e687d5 authored by Alexandre Delanoë's avatar Alexandre Delanoë

[Structure] Ngrams -> Text.

parent 50d95f87
{-|
Module : Gargantext.
Description :
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
Here is a longer description of this module, containing some
commentary with @some markup@.
-}
...@@ -33,21 +33,22 @@ library: ...@@ -33,21 +33,22 @@ library:
- Gargantext.Database.NodeNodeNgram - Gargantext.Database.NodeNodeNgram
- Gargantext.Database.Utils - Gargantext.Database.Utils
- Gargantext.Database.User - Gargantext.Database.User
- Gargantext.Ngrams - Gargantext.Text
- Gargantext.Ngrams.Analysis - Gargantext.Text.Analysis
- Gargantext.Ngrams.TFICF - Gargantext.Text.TFICF
- Gargantext.Ngrams.Letters - Gargantext.Text.Letters
- Gargantext.Ngrams.CoreNLP - Gargantext.Text.CoreNLP
- Gargantext.Ngrams.Parser - Gargantext.Text.Parser
- Gargantext.Ngrams.Lang.En - Gargantext.Text.Token.Text
- Gargantext.Ngrams.Stem.En - Gargantext.Text.Lang.En
- Gargantext.Ngrams.Lang.Fr - Gargantext.Text.Stem.En
- Gargantext.Ngrams.Metrics - Gargantext.Text.Lang.Fr
- Gargantext.Ngrams.TextMining - Gargantext.Text.Metrics
- Gargantext.Ngrams.Occurrences - Gargantext.Text.TextMining
- Gargantext.Parsers - Gargantext.Text.Occurrences
- Gargantext.Parsers.WOS - Gargantext.Text.Parsers
- Gargantext.Parsers.Date - Gargantext.Text.Parsers.WOS
- Gargantext.Text.Parsers.Date
- Gargantext.Prelude - Gargantext.Prelude
- Gargantext.API - Gargantext.API
- Gargantext.API.Auth - Gargantext.API.Auth
......
...@@ -24,7 +24,7 @@ import Test.Hspec ...@@ -24,7 +24,7 @@ import Test.Hspec
import Gargantext.Prelude import Gargantext.Prelude
import Gargantext.Types.Main (Language(..)) import Gargantext.Types.Main (Language(..))
import Gargantext.Ngrams.Parser (extractNgrams, selectNgrams) import Gargantext.Text.Parser (extractNgrams, selectNgrams)
ngramsExtractionTest :: IO () ngramsExtractionTest :: IO ()
......
...@@ -21,7 +21,7 @@ import Test.Hspec ...@@ -21,7 +21,7 @@ import Test.Hspec
import Gargantext.Prelude import Gargantext.Prelude
import Gargantext.Types.Main (Language(..)) import Gargantext.Types.Main (Language(..))
import Gargantext.Ngrams.Parser (extractNgrams, selectNgrams) import Gargantext.Text.Parser (extractNgrams, selectNgrams)
ngramsExtractionTest :: IO () ngramsExtractionTest :: IO ()
ngramsExtractionTest = hspec $ do ngramsExtractionTest = hspec $ do
......
...@@ -22,7 +22,7 @@ import Test.Hspec ...@@ -22,7 +22,7 @@ import Test.Hspec
import Data.Either (Either(Right)) import Data.Either (Either(Right))
import Gargantext.Prelude import Gargantext.Prelude
import Gargantext.Ngrams.Occurrences (parseOccurrences) import Gargantext.Text.Occurrences (parseOccurrences)
parsersTest :: IO () parsersTest :: IO ()
parsersTest = hspec $ do parsersTest = hspec $ do
......
...@@ -27,7 +27,7 @@ import Test.Hspec ...@@ -27,7 +27,7 @@ import Test.Hspec
import Test.QuickCheck import Test.QuickCheck
import Gargantext.Prelude import Gargantext.Prelude
import Gargantext.Ngrams.Metrics import Gargantext.Text.Metrics
#if !MIN_VERSION_base(4,8,0) #if !MIN_VERSION_base(4,8,0)
import Control.Applicative import Control.Applicative
......
module Data.Gargantext.Network where
import Data.Gargantext.Prelude
import Data.Map as DM
import Data.Vector as DV
type Measure a b c = DM.Map a (DM.Map b c)
-- UTCTime Paire Granularity [Candle]
-- GargVector Paire Granularity [Candle]
type GargVector a b c = DM.Map a ( DM.Map b c)
-- GargMatrix Granularity (Paire Paire) [Candle]
type GargMatrix a b c d = DM.Map a (FolioVector b c d)
-- GargMatrix Granularity (Paire Paire) [Candle]
type GargTensor a b c d e = DM.Map a (FolioMatrix b c d e)
--data PortGarg = PortGarg { _portFolioParameters :: Parameters
-- , _portGargData :: Garg
--}
toMeasure :: Granularity -> Paire -> [Candle]
-> Measure Granularity Paire Candle
toMeasure g c1 c2 cs = DM.fromList [(g,
{-| {-|
Module : Gargantext.Ngrams Module : Gargantext.Text
Description : Ngrams tools Description : Ngrams tools
Copyright : (c) CNRS, 2018 Copyright : (c) CNRS, 2018
License : AGPL + CECILL v3 License : AGPL + CECILL v3
...@@ -16,30 +16,29 @@ n non negative integer ...@@ -16,30 +16,29 @@ n non negative integer
{-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE NoImplicitPrelude #-}
module Gargantext.Ngrams ( module Gargantext.Ngrams.Letters module Gargantext.Text ( module Gargantext.Text.Letters
--, module Gargantext.Ngrams.Hetero --, module Gargantext.Text.Hetero
, module Gargantext.Ngrams.CoreNLP , module Gargantext.Text.CoreNLP
, module Gargantext.Ngrams.Parser , module Gargantext.Text.Parser
, module Gargantext.Ngrams.Occurrences , module Gargantext.Text.Occurrences
, module Gargantext.Ngrams.TextMining , module Gargantext.Text.TextMining
, module Gargantext.Ngrams.Metrics , module Gargantext.Text.Metrics
, Ngrams(..), ngrams, occ, sumOcc, text2fis, clean , Ngrams(..), ngrams, occ, sumOcc, text2fis, clean
, ListName(..), equivNgrams, isGram, sentences , ListName(..), equivNgrams, isGram, sentences
, ngramsTest , ngramsTest
--, module Gargantext.Ngrams.Words
) where ) where
import Gargantext.Ngrams.Letters import Gargantext.Text.Letters
--import Gargantext.Ngrams.Hetero --import Gargantext.Text.Hetero
import Gargantext.Ngrams.CoreNLP import Gargantext.Text.CoreNLP
import Gargantext.Ngrams.Parser import Gargantext.Text.Parser
import Gargantext.Ngrams.Occurrences import Gargantext.Text.Occurrences
import Gargantext.Ngrams.TextMining import Gargantext.Text.TextMining
--import Gargantext.Ngrams.Words --import Gargantext.Text.Words
import Gargantext.Ngrams.Metrics import Gargantext.Text.Metrics
import qualified Gargantext.Ngrams.FrequentItemSet as FIS import qualified Gargantext.Text.FrequentItemSet as FIS
----------------------------------------------------------------- -----------------------------------------------------------------
import Data.List (sort) import Data.List (sort)
...@@ -152,10 +151,9 @@ isStop c = c `elem` ['.','?','!'] ...@@ -152,10 +151,9 @@ isStop c = c `elem` ['.','?','!']
-- | Tests -- | Tests
-- TODO http://hackage.haskell.org/package/tokenize-0.3.0/docs/NLP-Tokenize-Text.html ngramsTest fp = ws
ngramsTest = ws
where where
txt = concat <$> lines <$> clean <$> readFile "Giono-arbres.txt" txt = concat <$> lines <$> clean <$> readFile fp
-- | Number of sentences -- | Number of sentences
ls = sentences <$> txt ls = sentences <$> txt
-- | Number of monograms used in the full text -- | Number of monograms used in the full text
...@@ -165,6 +163,6 @@ ngramsTest = ws ...@@ -165,6 +163,6 @@ ngramsTest = ws
-- group ngrams -- group ngrams
ocs = occ <$> ws ocs = occ <$> ws
--
...@@ -11,7 +11,7 @@ Portability : POSIX ...@@ -11,7 +11,7 @@ Portability : POSIX
{-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE NoImplicitPrelude #-}
module Gargantext.Ngrams.Analysis module Gargantext.Text.Analysis
where where
import Gargantext.Prelude (undefined, IO(), Int()) import Gargantext.Prelude (undefined, IO(), Int())
......
{-| {-|
Module : Gargantext.Ngrams.CoreNLP Module : Gargantext.Text.CoreNLP
Description : CoreNLP module Description : CoreNLP module
Copyright : (c) CNRS, 2017 Copyright : (c) CNRS, 2017
License : AGPL + CECILL v3 License : AGPL + CECILL v3
...@@ -16,7 +16,7 @@ Portability : POSIX ...@@ -16,7 +16,7 @@ Portability : POSIX
{-# LANGUAGE TemplateHaskell #-} {-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE TypeOperators #-} {-# LANGUAGE TypeOperators #-}
module Gargantext.Ngrams.CoreNLP where module Gargantext.Text.CoreNLP where
import Data.Aeson.TH (deriveJSON) import Data.Aeson.TH (deriveJSON)
import GHC.Generics import GHC.Generics
......
{-| {-|
Module : Gargantext.Ngrams.FrequentItemSet Module : Gargantext.Text.FrequentItemSet
Description : Ngrams tools Description : Ngrams tools
Copyright : (c) CNRS, 2018 Copyright : (c) CNRS, 2018
License : AGPL + CECILL v3 License : AGPL + CECILL v3
...@@ -13,7 +13,7 @@ Domain Specific Language to manage Frequent Item Set (FIS) ...@@ -13,7 +13,7 @@ Domain Specific Language to manage Frequent Item Set (FIS)
{-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE NoImplicitPrelude #-}
module Gargantext.Ngrams.FrequentItemSet module Gargantext.Text.FrequentItemSet
( Fis, Size ( Fis, Size
, occ, cooc , occ, cooc
, all, between , all, between
......
module Gargantext.Ngrams.Hetero where module Gargantext.Text.Hetero where
import GHC.Real as R import GHC.Real as R
import Data.Set as S import Data.Set as S
...@@ -12,8 +12,8 @@ import Gargantext.Database.Gargandb ...@@ -12,8 +12,8 @@ import Gargantext.Database.Gargandb
import Gargantext.Database.Private import Gargantext.Database.Private
--import Gargantext.Utils.Chronos --import Gargantext.Utils.Chronos
import Gargantext.Ngrams.Words (cleanText) import Gargantext.Text.Words (cleanText)
import Gargantext.Ngrams.Count (occurrences) import Gargantext.Text.Count (occurrences)
import Gargantext.Database.Simple import Gargantext.Database.Simple
......
{-| {-|
Module : Gargantext.Ngrams.Lang.En Module : Gargantext.Text.Lang.En
Description : Description :
Copyright : (c) CNRS, 2017-Present Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3 License : AGPL + CECILL v3
...@@ -14,7 +14,7 @@ commentary with @some markup@. ...@@ -14,7 +14,7 @@ commentary with @some markup@.
{-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE OverloadedStrings #-}
module Gargantext.Ngrams.Lang.En (selectNgrams, groupNgrams, textTest) where module Gargantext.Text.Lang.En (selectNgrams, groupNgrams, textTest) where
import Gargantext.Prelude import Gargantext.Prelude
import Data.Text (Text) import Data.Text (Text)
......
{-| {-|
Module : Gargantext.Ngrams.Lang.Fr Module : Gargantext.Text.Lang.Fr
Description : Description :
Copyright : (c) CNRS, 2017-Present Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3 License : AGPL + CECILL v3
...@@ -14,7 +14,7 @@ commentary with @some markup@. ...@@ -14,7 +14,7 @@ commentary with @some markup@.
{-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE OverloadedStrings #-}
module Gargantext.Ngrams.Lang.Fr (selectNgrams, groupNgrams, textTest) module Gargantext.Text.Lang.Fr (selectNgrams, groupNgrams, textTest)
where where
import Gargantext.Prelude import Gargantext.Prelude
......
{-| {-|
Module : Gargantext.Ngrams.Letters Module : Gargantext.Text.Letters
Description : Ngrams.Letters module Description : Ngrams.Letters module
Copyright : (c) CNRS, 2017 Copyright : (c) CNRS, 2017
License : AGPL + CECILL v3 License : AGPL + CECILL v3
...@@ -14,7 +14,7 @@ Sugar to work on letters with Text. ...@@ -14,7 +14,7 @@ Sugar to work on letters with Text.
{-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE OverloadedStrings #-}
module Gargantext.Ngrams.Letters where module Gargantext.Text.Letters where
import qualified Data.Text.Lazy as DTL import qualified Data.Text.Lazy as DTL
-- import qualified Data.Text.Lazy.IO as DTLIO -- import qualified Data.Text.Lazy.IO as DTLIO
......
{-| {-|
Module : Gargantext.Ngrams.List Module : Gargantext.Text.List
Description : Description :
Copyright : (c) CNRS, 2017-Present Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3 License : AGPL + CECILL v3
...@@ -13,11 +13,11 @@ commentary with @some markup@. ...@@ -13,11 +13,11 @@ commentary with @some markup@.
{-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE NoImplicitPrelude #-}
module Gargantext.Ngrams.List where module Gargantext.Text.List where
import Data.Maybe import Data.Maybe
import Data.List (filter) import Data.List (filter)
import Gargantext.Ngrams import Gargantext.Text
import Gargantext.Prelude import Gargantext.Prelude
graph :: [Ngrams] -> [Ngrams] graph :: [Ngrams] -> [Ngrams]
......
{-| {-|
Module : Gargantext.Ngrams.Metrics Module : Gargantext.Text.Metrics
Description : Short description Description : Short description
Copyright : (c) Some Guy, 2013 Copyright : (c) Some Guy, 2013
Someone Else, 2014 Someone Else, 2014
...@@ -13,7 +13,7 @@ Mainly reexport functions in @Data.Text.Metrics@ ...@@ -13,7 +13,7 @@ Mainly reexport functions in @Data.Text.Metrics@
{-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE NoImplicitPrelude #-}
module Gargantext.Ngrams.Metrics (levenshtein module Gargantext.Text.Metrics (levenshtein
, levenshteinNorm , levenshteinNorm
, damerauLevenshtein , damerauLevenshtein
, damerauLevenshteinNorm , damerauLevenshteinNorm
......
{-| {-|
Module : Gargantext.Ngrams.Occurrences Module : Gargantext.Text.Occurrences
Description : Description :
Copyright : (c) CNRS, 2017-Present Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3 License : AGPL + CECILL v3
...@@ -14,7 +14,7 @@ commentary with @some markup@. ...@@ -14,7 +14,7 @@ commentary with @some markup@.
{-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE OverloadedStrings #-}
module Gargantext.Ngrams.Occurrences where module Gargantext.Text.Occurrences where
import Gargantext.Prelude import Gargantext.Prelude
......
{-| {-|
Module : Gargantext.Ngrams.Parser Module : Gargantext.Text.Parser
Description : Description :
Copyright : (c) CNRS, 2017-Present Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3 License : AGPL + CECILL v3
...@@ -15,15 +15,15 @@ commentary with @some markup@. ...@@ -15,15 +15,15 @@ commentary with @some markup@.
{-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE ScopedTypeVariables #-} {-# LANGUAGE ScopedTypeVariables #-}
module Gargantext.Ngrams.Parser where module Gargantext.Text.Parser where
import Gargantext.Prelude import Gargantext.Prelude
import Gargantext.Ngrams.CoreNLP import Gargantext.Text.CoreNLP
import Data.Text hiding (map) import Data.Text hiding (map)
import Gargantext.Types.Main (Language(..)) import Gargantext.Types.Main (Language(..))
import qualified Gargantext.Ngrams.Lang.En as En import qualified Gargantext.Text.Lang.En as En
import qualified Gargantext.Ngrams.Lang.Fr as Fr import qualified Gargantext.Text.Lang.Fr as Fr
type SNgrams = (Text, Text, Text) type SNgrams = (Text, Text, Text)
......
...@@ -20,7 +20,7 @@ please follow the types. ...@@ -20,7 +20,7 @@ please follow the types.
{-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE NoImplicitPrelude #-}
module Gargantext.Parsers -- (parse, FileFormat(..)) module Gargantext.Text.Parsers -- (parse, FileFormat(..))
where where
import Gargantext.Prelude import Gargantext.Prelude
...@@ -46,7 +46,7 @@ import Path.IO (resolveFile') ...@@ -46,7 +46,7 @@ import Path.IO (resolveFile')
import Control.Concurrent.Async as CCA (mapConcurrently) import Control.Concurrent.Async as CCA (mapConcurrently)
import Data.String (String()) import Data.String (String())
import Gargantext.Parsers.WOS (wosParser) import Gargantext.Text.Parsers.WOS (wosParser)
---- import Gargantext.Parsers.XML (xmlParser) ---- import Gargantext.Parsers.XML (xmlParser)
---- import Gargantext.Parsers.DOC (docParser) ---- import Gargantext.Parsers.DOC (docParser)
---- import Gargantext.Parsers.ODT (odtParser) ---- import Gargantext.Parsers.ODT (odtParser)
......
{-| {-|
Module : Gargantext.Parsers.Date Module : Gargantext.Text.Parsers.Date
Description : Some utils to parse dates Description : Some utils to parse dates
Copyright : (c) CNRS 2017-present Copyright : (c) CNRS 2017-present
License : AGPL + CECILL v3 License : AGPL + CECILL v3
...@@ -18,7 +18,7 @@ DGP.parseDate1 DGP.FR "12 avril 2010" == "2010-04-12T00:00:00.000+00:00" ...@@ -18,7 +18,7 @@ DGP.parseDate1 DGP.FR "12 avril 2010" == "2010-04-12T00:00:00.000+00:00"
{-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE OverloadedStrings #-}
module Gargantext.Parsers.Date (parseDate1, Lang(FR, EN), parseDate, fromRFC3339, parseTimeOfDay, getMultiplicator) where module Gargantext.Text.Parsers.Date (parseDate1, Lang(FR, EN), parseDate, fromRFC3339, parseTimeOfDay, getMultiplicator) where
import Gargantext.Prelude import Gargantext.Prelude
import Prelude (toInteger, div, otherwise, (++)) import Prelude (toInteger, div, otherwise, (++))
......
{-| {-|
Module : Gargantext.Parsers.WOS Module : Gargantext.Text.Parsers.WOS
Description : Description :
Copyright : (c) CNRS, 2017-Present Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3 License : AGPL + CECILL v3
...@@ -14,7 +14,7 @@ commentary with @some markup@. ...@@ -14,7 +14,7 @@ commentary with @some markup@.
{-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE OverloadedStrings #-}
module Gargantext.Parsers.WOS (wosParser) where module Gargantext.Text.Parsers.WOS (wosParser) where
-- TOFIX : Should import Gargantext.Prelude here -- TOFIX : Should import Gargantext.Prelude here
import Prelude hiding (takeWhile, take, concat, readFile, lines, concat) import Prelude hiding (takeWhile, take, concat, readFile, lines, concat)
......
...@@ -17,7 +17,7 @@ Adapted from: ...@@ -17,7 +17,7 @@ Adapted from:
{-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE NoImplicitPrelude #-}
module Gargantext.Ngrams.Stem.En module Gargantext.Text.Stem.En
where where
import Control.Monad import Control.Monad
......
{-| {-|
Module : Gargantext.Ngrams.TFICF Module : Gargantext.Text.TFICF
Description : TFICF Ngrams tools Description : TFICF Ngrams tools
Copyright : (c) CNRS, 2017 Copyright : (c) CNRS, 2017
License : AGPL + CECILL v3 License : AGPL + CECILL v3
...@@ -15,7 +15,7 @@ Definition of TFICF ...@@ -15,7 +15,7 @@ Definition of TFICF
{-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE NoImplicitPrelude #-}
module Gargantext.Ngrams.TFICF where module Gargantext.Text.TFICF where
import GHC.Generics (Generic) import GHC.Generics (Generic)
......
{-| {-|
Module : Gargantext.Ngrams.TextMining Module : Gargantext.Text.TextMining
Description : Description :
Copyright : (c) CNRS, 2017-Present Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3 License : AGPL + CECILL v3
...@@ -13,7 +13,7 @@ commentary with @some markup@. ...@@ -13,7 +13,7 @@ commentary with @some markup@.
{-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE NoImplicitPrelude #-}
module Gargantext.Ngrams.TextMining where module Gargantext.Text.TextMining where
import Gargantext.Prelude import Gargantext.Prelude
import Data.Ord(Ordering(LT,GT), compare) import Data.Ord(Ordering(LT,GT), compare)
......
{-| {-|
Module : Gargantext.Ngrams.Token.Text Module : Gargantext.Text.Token.Text
Description : Description :
Copyright : (c) CNRS, 2017-Present Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3 License : AGPL + CECILL v3
...@@ -13,7 +13,7 @@ Inspired from https://bitbucket.org/gchrupala/lingo/overview ...@@ -13,7 +13,7 @@ Inspired from https://bitbucket.org/gchrupala/lingo/overview
{-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE OverloadedStrings #-}
module Gargantext.Ngrams.Token.Text module Gargantext.Text.Token.Text
( EitherList(..) ( EitherList(..)
, Tokenizer , Tokenizer
, tokenize , tokenize
...@@ -32,8 +32,6 @@ where ...@@ -32,8 +32,6 @@ where
import qualified Data.Char as Char import qualified Data.Char as Char
import Data.Maybe import Data.Maybe
import Control.Monad.Instances ()
import Control.Applicative
import Control.Monad import Control.Monad
import Data.Text (Text) import Data.Text (Text)
...@@ -49,6 +47,18 @@ import qualified Data.Text as T ...@@ -49,6 +47,18 @@ import qualified Data.Text as T
--- ---
-- > myTokenizer :: Tokenizer -- > myTokenizer :: Tokenizer
-- > myTokenizer = whitespace >=> allPunctuation -- > myTokenizer = whitespace >=> allPunctuation
-- examples :: [Text]
-- examples =
-- ["This shouldn't happen."
-- ,"Some 'quoted' stuff"
-- ,"This is a URL: http://example.org."
-- ,"How about an email@example.com"
-- ,"ReferenceError #1065 broke my debugger!"
-- ,"I would've gone."
-- ,"They've been there."
-- ,"Hyphen-words"
-- ,"Yes/No questions"
-- ]
--- ---
type Tokenizer = Text -> EitherList Text Text type Tokenizer = Text -> EitherList Text Text
...@@ -81,8 +91,8 @@ uris x | isUri x = E [Left x] ...@@ -81,8 +91,8 @@ uris x | isUri x = E [Left x]
punctuation :: Tokenizer punctuation :: Tokenizer
punctuation = finalPunctuation >=> initialPunctuation punctuation = finalPunctuation >=> initialPunctuation
hyphens :: Tokenizer --hyphens :: Tokenizer
hyphens xs = E [Right w | w <- T.split (=='-') xs ] --hyphens xs = E [Right w | w <- T.split (=='-') xs ]
-- | Split off word-final punctuation -- | Split off word-final punctuation
finalPunctuation :: Tokenizer finalPunctuation :: Tokenizer
...@@ -152,16 +162,3 @@ unwrap :: Either a a -> a ...@@ -152,16 +162,3 @@ unwrap :: Either a a -> a
unwrap (Left x) = x unwrap (Left x) = x
unwrap (Right x) = x unwrap (Right x) = x
examples :: [Text]
examples =
["This shouldn't happen."
,"Some 'quoted' stuff"
,"This is a URL: http://example.org."
,"How about an email@example.com"
,"ReferenceError #1065 broke my debugger!"
,"I would've gone."
,"They've been there."
,"Hyphen-words"
,"Yes/No questions"
]
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment