Commit f4e687d5 authored by Alexandre Delanoë's avatar Alexandre Delanoë

[Structure] Ngrams -> Text.

parent 50d95f87
{-|
Module : Gargantext.
Description :
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
Here is a longer description of this module, containing some
commentary with @some markup@.
-}
......@@ -33,21 +33,22 @@ library:
- Gargantext.Database.NodeNodeNgram
- Gargantext.Database.Utils
- Gargantext.Database.User
- Gargantext.Ngrams
- Gargantext.Ngrams.Analysis
- Gargantext.Ngrams.TFICF
- Gargantext.Ngrams.Letters
- Gargantext.Ngrams.CoreNLP
- Gargantext.Ngrams.Parser
- Gargantext.Ngrams.Lang.En
- Gargantext.Ngrams.Stem.En
- Gargantext.Ngrams.Lang.Fr
- Gargantext.Ngrams.Metrics
- Gargantext.Ngrams.TextMining
- Gargantext.Ngrams.Occurrences
- Gargantext.Parsers
- Gargantext.Parsers.WOS
- Gargantext.Parsers.Date
- Gargantext.Text
- Gargantext.Text.Analysis
- Gargantext.Text.TFICF
- Gargantext.Text.Letters
- Gargantext.Text.CoreNLP
- Gargantext.Text.Parser
- Gargantext.Text.Token.Text
- Gargantext.Text.Lang.En
- Gargantext.Text.Stem.En
- Gargantext.Text.Lang.Fr
- Gargantext.Text.Metrics
- Gargantext.Text.TextMining
- Gargantext.Text.Occurrences
- Gargantext.Text.Parsers
- Gargantext.Text.Parsers.WOS
- Gargantext.Text.Parsers.Date
- Gargantext.Prelude
- Gargantext.API
- Gargantext.API.Auth
......
......@@ -24,7 +24,7 @@ import Test.Hspec
import Gargantext.Prelude
import Gargantext.Types.Main (Language(..))
import Gargantext.Ngrams.Parser (extractNgrams, selectNgrams)
import Gargantext.Text.Parser (extractNgrams, selectNgrams)
ngramsExtractionTest :: IO ()
......
......@@ -21,7 +21,7 @@ import Test.Hspec
import Gargantext.Prelude
import Gargantext.Types.Main (Language(..))
import Gargantext.Ngrams.Parser (extractNgrams, selectNgrams)
import Gargantext.Text.Parser (extractNgrams, selectNgrams)
ngramsExtractionTest :: IO ()
ngramsExtractionTest = hspec $ do
......
......@@ -22,7 +22,7 @@ import Test.Hspec
import Data.Either (Either(Right))
import Gargantext.Prelude
import Gargantext.Ngrams.Occurrences (parseOccurrences)
import Gargantext.Text.Occurrences (parseOccurrences)
parsersTest :: IO ()
parsersTest = hspec $ do
......
......@@ -27,7 +27,7 @@ import Test.Hspec
import Test.QuickCheck
import Gargantext.Prelude
import Gargantext.Ngrams.Metrics
import Gargantext.Text.Metrics
#if !MIN_VERSION_base(4,8,0)
import Control.Applicative
......
module Data.Gargantext.Network where
import Data.Gargantext.Prelude
import Data.Map as DM
import Data.Vector as DV
type Measure a b c = DM.Map a (DM.Map b c)
-- UTCTime Paire Granularity [Candle]
-- GargVector Paire Granularity [Candle]
type GargVector a b c = DM.Map a ( DM.Map b c)
-- GargMatrix Granularity (Paire Paire) [Candle]
type GargMatrix a b c d = DM.Map a (FolioVector b c d)
-- GargMatrix Granularity (Paire Paire) [Candle]
type GargTensor a b c d e = DM.Map a (FolioMatrix b c d e)
--data PortGarg = PortGarg { _portFolioParameters :: Parameters
-- , _portGargData :: Garg
--}
toMeasure :: Granularity -> Paire -> [Candle]
-> Measure Granularity Paire Candle
toMeasure g c1 c2 cs = DM.fromList [(g,
{-|
Module : Gargantext.Ngrams
Module : Gargantext.Text
Description : Ngrams tools
Copyright : (c) CNRS, 2018
License : AGPL + CECILL v3
......@@ -16,30 +16,29 @@ n non negative integer
{-# LANGUAGE NoImplicitPrelude #-}
module Gargantext.Ngrams ( module Gargantext.Ngrams.Letters
--, module Gargantext.Ngrams.Hetero
, module Gargantext.Ngrams.CoreNLP
, module Gargantext.Ngrams.Parser
, module Gargantext.Ngrams.Occurrences
, module Gargantext.Ngrams.TextMining
, module Gargantext.Ngrams.Metrics
module Gargantext.Text ( module Gargantext.Text.Letters
--, module Gargantext.Text.Hetero
, module Gargantext.Text.CoreNLP
, module Gargantext.Text.Parser
, module Gargantext.Text.Occurrences
, module Gargantext.Text.TextMining
, module Gargantext.Text.Metrics
, Ngrams(..), ngrams, occ, sumOcc, text2fis, clean
, ListName(..), equivNgrams, isGram, sentences
, ngramsTest
--, module Gargantext.Ngrams.Words
) where
import Gargantext.Ngrams.Letters
--import Gargantext.Ngrams.Hetero
import Gargantext.Ngrams.CoreNLP
import Gargantext.Ngrams.Parser
import Gargantext.Text.Letters
--import Gargantext.Text.Hetero
import Gargantext.Text.CoreNLP
import Gargantext.Text.Parser
import Gargantext.Ngrams.Occurrences
import Gargantext.Ngrams.TextMining
--import Gargantext.Ngrams.Words
import Gargantext.Text.Occurrences
import Gargantext.Text.TextMining
--import Gargantext.Text.Words
import Gargantext.Ngrams.Metrics
import qualified Gargantext.Ngrams.FrequentItemSet as FIS
import Gargantext.Text.Metrics
import qualified Gargantext.Text.FrequentItemSet as FIS
-----------------------------------------------------------------
import Data.List (sort)
......@@ -152,10 +151,9 @@ isStop c = c `elem` ['.','?','!']
-- | Tests
-- TODO http://hackage.haskell.org/package/tokenize-0.3.0/docs/NLP-Tokenize-Text.html
ngramsTest = ws
ngramsTest fp = ws
where
txt = concat <$> lines <$> clean <$> readFile "Giono-arbres.txt"
txt = concat <$> lines <$> clean <$> readFile fp
-- | Number of sentences
ls = sentences <$> txt
-- | Number of monograms used in the full text
......@@ -165,6 +163,6 @@ ngramsTest = ws
-- group ngrams
ocs = occ <$> ws
--
......@@ -11,7 +11,7 @@ Portability : POSIX
{-# LANGUAGE NoImplicitPrelude #-}
module Gargantext.Ngrams.Analysis
module Gargantext.Text.Analysis
where
import Gargantext.Prelude (undefined, IO(), Int())
......
{-|
Module : Gargantext.Ngrams.CoreNLP
Module : Gargantext.Text.CoreNLP
Description : CoreNLP module
Copyright : (c) CNRS, 2017
License : AGPL + CECILL v3
......@@ -16,7 +16,7 @@ Portability : POSIX
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE TypeOperators #-}
module Gargantext.Ngrams.CoreNLP where
module Gargantext.Text.CoreNLP where
import Data.Aeson.TH (deriveJSON)
import GHC.Generics
......
{-|
Module : Gargantext.Ngrams.FrequentItemSet
Module : Gargantext.Text.FrequentItemSet
Description : Ngrams tools
Copyright : (c) CNRS, 2018
License : AGPL + CECILL v3
......@@ -13,7 +13,7 @@ Domain Specific Language to manage Frequent Item Set (FIS)
{-# LANGUAGE NoImplicitPrelude #-}
module Gargantext.Ngrams.FrequentItemSet
module Gargantext.Text.FrequentItemSet
( Fis, Size
, occ, cooc
, all, between
......
module Gargantext.Ngrams.Hetero where
module Gargantext.Text.Hetero where
import GHC.Real as R
import Data.Set as S
......@@ -12,8 +12,8 @@ import Gargantext.Database.Gargandb
import Gargantext.Database.Private
--import Gargantext.Utils.Chronos
import Gargantext.Ngrams.Words (cleanText)
import Gargantext.Ngrams.Count (occurrences)
import Gargantext.Text.Words (cleanText)
import Gargantext.Text.Count (occurrences)
import Gargantext.Database.Simple
......
{-|
Module : Gargantext.Ngrams.Lang.En
Module : Gargantext.Text.Lang.En
Description :
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
......@@ -14,7 +14,7 @@ commentary with @some markup@.
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE OverloadedStrings #-}
module Gargantext.Ngrams.Lang.En (selectNgrams, groupNgrams, textTest) where
module Gargantext.Text.Lang.En (selectNgrams, groupNgrams, textTest) where
import Gargantext.Prelude
import Data.Text (Text)
......
{-|
Module : Gargantext.Ngrams.Lang.Fr
Module : Gargantext.Text.Lang.Fr
Description :
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
......@@ -14,7 +14,7 @@ commentary with @some markup@.
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE OverloadedStrings #-}
module Gargantext.Ngrams.Lang.Fr (selectNgrams, groupNgrams, textTest)
module Gargantext.Text.Lang.Fr (selectNgrams, groupNgrams, textTest)
where
import Gargantext.Prelude
......
{-|
Module : Gargantext.Ngrams.Letters
Module : Gargantext.Text.Letters
Description : Ngrams.Letters module
Copyright : (c) CNRS, 2017
License : AGPL + CECILL v3
......@@ -14,7 +14,7 @@ Sugar to work on letters with Text.
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE OverloadedStrings #-}
module Gargantext.Ngrams.Letters where
module Gargantext.Text.Letters where
import qualified Data.Text.Lazy as DTL
-- import qualified Data.Text.Lazy.IO as DTLIO
......
{-|
Module : Gargantext.Ngrams.List
Module : Gargantext.Text.List
Description :
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
......@@ -13,11 +13,11 @@ commentary with @some markup@.
{-# LANGUAGE NoImplicitPrelude #-}
module Gargantext.Ngrams.List where
module Gargantext.Text.List where
import Data.Maybe
import Data.List (filter)
import Gargantext.Ngrams
import Gargantext.Text
import Gargantext.Prelude
graph :: [Ngrams] -> [Ngrams]
......
{-|
Module : Gargantext.Ngrams.Metrics
Module : Gargantext.Text.Metrics
Description : Short description
Copyright : (c) Some Guy, 2013
Someone Else, 2014
......@@ -13,7 +13,7 @@ Mainly reexport functions in @Data.Text.Metrics@
{-# LANGUAGE NoImplicitPrelude #-}
module Gargantext.Ngrams.Metrics (levenshtein
module Gargantext.Text.Metrics (levenshtein
, levenshteinNorm
, damerauLevenshtein
, damerauLevenshteinNorm
......
{-|
Module : Gargantext.Ngrams.Occurrences
Module : Gargantext.Text.Occurrences
Description :
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
......@@ -14,7 +14,7 @@ commentary with @some markup@.
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE OverloadedStrings #-}
module Gargantext.Ngrams.Occurrences where
module Gargantext.Text.Occurrences where
import Gargantext.Prelude
......
{-|
Module : Gargantext.Ngrams.Parser
Module : Gargantext.Text.Parser
Description :
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
......@@ -15,15 +15,15 @@ commentary with @some markup@.
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE ScopedTypeVariables #-}
module Gargantext.Ngrams.Parser where
module Gargantext.Text.Parser where
import Gargantext.Prelude
import Gargantext.Ngrams.CoreNLP
import Gargantext.Text.CoreNLP
import Data.Text hiding (map)
import Gargantext.Types.Main (Language(..))
import qualified Gargantext.Ngrams.Lang.En as En
import qualified Gargantext.Ngrams.Lang.Fr as Fr
import qualified Gargantext.Text.Lang.En as En
import qualified Gargantext.Text.Lang.Fr as Fr
type SNgrams = (Text, Text, Text)
......
......@@ -20,7 +20,7 @@ please follow the types.
{-# LANGUAGE NoImplicitPrelude #-}
module Gargantext.Parsers -- (parse, FileFormat(..))
module Gargantext.Text.Parsers -- (parse, FileFormat(..))
where
import Gargantext.Prelude
......@@ -46,7 +46,7 @@ import Path.IO (resolveFile')
import Control.Concurrent.Async as CCA (mapConcurrently)
import Data.String (String())
import Gargantext.Parsers.WOS (wosParser)
import Gargantext.Text.Parsers.WOS (wosParser)
---- import Gargantext.Parsers.XML (xmlParser)
---- import Gargantext.Parsers.DOC (docParser)
---- import Gargantext.Parsers.ODT (odtParser)
......
{-|
Module : Gargantext.Parsers.Date
Module : Gargantext.Text.Parsers.Date
Description : Some utils to parse dates
Copyright : (c) CNRS 2017-present
License : AGPL + CECILL v3
......@@ -18,7 +18,7 @@ DGP.parseDate1 DGP.FR "12 avril 2010" == "2010-04-12T00:00:00.000+00:00"
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE OverloadedStrings #-}
module Gargantext.Parsers.Date (parseDate1, Lang(FR, EN), parseDate, fromRFC3339, parseTimeOfDay, getMultiplicator) where
module Gargantext.Text.Parsers.Date (parseDate1, Lang(FR, EN), parseDate, fromRFC3339, parseTimeOfDay, getMultiplicator) where
import Gargantext.Prelude
import Prelude (toInteger, div, otherwise, (++))
......
{-|
Module : Gargantext.Parsers.WOS
Module : Gargantext.Text.Parsers.WOS
Description :
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
......@@ -14,7 +14,7 @@ commentary with @some markup@.
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE OverloadedStrings #-}
module Gargantext.Parsers.WOS (wosParser) where
module Gargantext.Text.Parsers.WOS (wosParser) where
-- TOFIX : Should import Gargantext.Prelude here
import Prelude hiding (takeWhile, take, concat, readFile, lines, concat)
......
......@@ -17,7 +17,7 @@ Adapted from:
{-# LANGUAGE NoImplicitPrelude #-}
module Gargantext.Ngrams.Stem.En
module Gargantext.Text.Stem.En
where
import Control.Monad
......
{-|
Module : Gargantext.Ngrams.TFICF
Module : Gargantext.Text.TFICF
Description : TFICF Ngrams tools
Copyright : (c) CNRS, 2017
License : AGPL + CECILL v3
......@@ -15,7 +15,7 @@ Definition of TFICF
{-# LANGUAGE NoImplicitPrelude #-}
module Gargantext.Ngrams.TFICF where
module Gargantext.Text.TFICF where
import GHC.Generics (Generic)
......
{-|
Module : Gargantext.Ngrams.TextMining
Module : Gargantext.Text.TextMining
Description :
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
......@@ -13,7 +13,7 @@ commentary with @some markup@.
{-# LANGUAGE NoImplicitPrelude #-}
module Gargantext.Ngrams.TextMining where
module Gargantext.Text.TextMining where
import Gargantext.Prelude
import Data.Ord(Ordering(LT,GT), compare)
......
{-|
Module : Gargantext.Ngrams.Token.Text
Module : Gargantext.Text.Token.Text
Description :
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
......@@ -13,7 +13,7 @@ Inspired from https://bitbucket.org/gchrupala/lingo/overview
{-# LANGUAGE OverloadedStrings #-}
module Gargantext.Ngrams.Token.Text
module Gargantext.Text.Token.Text
( EitherList(..)
, Tokenizer
, tokenize
......@@ -32,8 +32,6 @@ where
import qualified Data.Char as Char
import Data.Maybe
import Control.Monad.Instances ()
import Control.Applicative
import Control.Monad
import Data.Text (Text)
......@@ -49,6 +47,18 @@ import qualified Data.Text as T
---
-- > myTokenizer :: Tokenizer
-- > myTokenizer = whitespace >=> allPunctuation
-- examples :: [Text]
-- examples =
-- ["This shouldn't happen."
-- ,"Some 'quoted' stuff"
-- ,"This is a URL: http://example.org."
-- ,"How about an email@example.com"
-- ,"ReferenceError #1065 broke my debugger!"
-- ,"I would've gone."
-- ,"They've been there."
-- ,"Hyphen-words"
-- ,"Yes/No questions"
-- ]
---
type Tokenizer = Text -> EitherList Text Text
......@@ -81,8 +91,8 @@ uris x | isUri x = E [Left x]
punctuation :: Tokenizer
punctuation = finalPunctuation >=> initialPunctuation
hyphens :: Tokenizer
hyphens xs = E [Right w | w <- T.split (=='-') xs ]
--hyphens :: Tokenizer
--hyphens xs = E [Right w | w <- T.split (=='-') xs ]
-- | Split off word-final punctuation
finalPunctuation :: Tokenizer
......@@ -152,16 +162,3 @@ unwrap :: Either a a -> a
unwrap (Left x) = x
unwrap (Right x) = x
examples :: [Text]
examples =
["This shouldn't happen."
,"Some 'quoted' stuff"
,"This is a URL: http://example.org."
,"How about an email@example.com"
,"ReferenceError #1065 broke my debugger!"
,"I would've gone."
,"They've been there."
,"Hyphen-words"
,"Yes/No questions"
]
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment