Commit 697d44ea authored by Alexandre Delanoë's avatar Alexandre Delanoë

[STRUCTURE] Make it simple and clean old code.

parent f4e687d5
...@@ -22,41 +22,20 @@ library: ...@@ -22,41 +22,20 @@ library:
- -Werror - -Werror
exposed-modules: exposed-modules:
- Gargantext - Gargantext
- Gargantext.Database - Gargantext.Prelude
- Gargantext.Database.Instances - Gargantext.Core
- Gargantext.Database.Ngram - Gargantext.Core.Utils
- Gargantext.Database.Node - Gargantext.Core.Types
- Gargantext.Database.Facet - Gargantext.Core.Types.Node
- Gargantext.Database.NodeNgram
- Gargantext.Database.NodeNgramNgram
- Gargantext.Database.NodeNode
- Gargantext.Database.NodeNodeNgram
- Gargantext.Database.Utils
- Gargantext.Database.User
- Gargantext.Text - Gargantext.Text
- Gargantext.Text.Analysis
- Gargantext.Text.TFICF
- Gargantext.Text.Letters
- Gargantext.Text.CoreNLP
- Gargantext.Text.Parser
- Gargantext.Text.Token.Text
- Gargantext.Text.Lang.En
- Gargantext.Text.Stem.En
- Gargantext.Text.Lang.Fr
- Gargantext.Text.Metrics - Gargantext.Text.Metrics
- Gargantext.Text.TextMining - Gargantext.Text.Metrics.Occurrences
- Gargantext.Text.Occurrences - Gargantext.Text.Metrics.FrequentItemSet
- Gargantext.Text.Parsers - Gargantext.Text.Ngrams.PosTagging.CoreNLP
- Gargantext.Text.Parsers.WOS - Gargantext.Text.Ngrams.PosTagging.Parser
- Gargantext.Text.Parsers.Date - Gargantext.Text.Parsers.Date
- Gargantext.Prelude - Gargantext.Database
- Gargantext.API - Gargantext.API
- Gargantext.API.Auth
- Gargantext.Types
- Gargantext.Types.Main
- Gargantext.Types.Node
- Gargantext.Utils.DateUtils
- Gargantext.Utils.Prefix
dependencies: dependencies:
- QuickCheck - QuickCheck
- aeson - aeson
......
...@@ -11,7 +11,7 @@ Portability : POSIX ...@@ -11,7 +11,7 @@ Portability : POSIX
{-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE NoImplicitPrelude #-}
import Gargantext.Prelude import Gargantext.Prelude
import Gargantext.Types.Main (Language(..)) import Gargantext.Core (Lang(..))
--import qualified Ngrams.Lang.Fr as Fr --import qualified Ngrams.Lang.Fr as Fr
import qualified Ngrams.Lang as Lang import qualified Ngrams.Lang as Lang
import qualified Ngrams.Lang.Occurrences as Occ import qualified Ngrams.Lang.Occurrences as Occ
......
...@@ -18,11 +18,11 @@ module Ngrams.Lang where ...@@ -18,11 +18,11 @@ module Ngrams.Lang where
import Gargantext.Prelude (IO()) import Gargantext.Prelude (IO())
import Gargantext.Types.Main (Language(..)) import Gargantext.Core (Lang(..))
import qualified Ngrams.Lang.Fr as Fr import qualified Ngrams.Lang.Fr as Fr
import qualified Ngrams.Lang.En as En import qualified Ngrams.Lang.En as En
ngramsExtractionTest :: Language -> IO () ngramsExtractionTest :: Lang -> IO ()
ngramsExtractionTest FR = Fr.ngramsExtractionTest ngramsExtractionTest FR = Fr.ngramsExtractionTest
ngramsExtractionTest EN = En.ngramsExtractionTest ngramsExtractionTest EN = En.ngramsExtractionTest
...@@ -23,8 +23,8 @@ import Data.Text (Text) ...@@ -23,8 +23,8 @@ import Data.Text (Text)
import Test.Hspec import Test.Hspec
import Gargantext.Prelude import Gargantext.Prelude
import Gargantext.Types.Main (Language(..)) import Gargantext.Core (Lang(..))
import Gargantext.Text.Parser (extractNgrams, selectNgrams) import Gargantext.Text.Ngrams.PosTagging.Parser (extractNgrams, selectNgrams)
ngramsExtractionTest :: IO () ngramsExtractionTest :: IO ()
......
...@@ -20,8 +20,8 @@ module Ngrams.Lang.Fr where ...@@ -20,8 +20,8 @@ module Ngrams.Lang.Fr where
import Test.Hspec import Test.Hspec
import Gargantext.Prelude import Gargantext.Prelude
import Gargantext.Types.Main (Language(..)) import Gargantext.Core (Lang(..))
import Gargantext.Text.Parser (extractNgrams, selectNgrams) import Gargantext.Text.Ngrams.PosTagging.Parser (extractNgrams, selectNgrams)
ngramsExtractionTest :: IO () ngramsExtractionTest :: IO ()
ngramsExtractionTest = hspec $ do ngramsExtractionTest = hspec $ do
......
...@@ -22,7 +22,7 @@ import Test.Hspec ...@@ -22,7 +22,7 @@ import Test.Hspec
import Data.Either (Either(Right)) import Data.Either (Either(Right))
import Gargantext.Prelude import Gargantext.Prelude
import Gargantext.Text.Occurrences (parseOccurrences) import Gargantext.Text.Metrics.Occurrences (parseOccurrences)
parsersTest :: IO () parsersTest :: IO ()
parsersTest = hspec $ do parsersTest = hspec $ do
......
...@@ -15,11 +15,9 @@ commentary with @some markup@. ...@@ -15,11 +15,9 @@ commentary with @some markup@.
module Parsers.Date where module Parsers.Date where
import Gargantext.Prelude
import Test.Hspec import Test.Hspec
import Test.QuickCheck import Test.QuickCheck
import Parsers.Types
import Control.Applicative ((<*>)) import Control.Applicative ((<*>))
import Data.Either (Either(..)) import Data.Either (Either(..))
...@@ -28,7 +26,12 @@ import Data.Text (pack, Text) ...@@ -28,7 +26,12 @@ import Data.Text (pack, Text)
import Text.Parsec.Error (ParseError) import Text.Parsec.Error (ParseError)
import Duckling.Time.Types (toRFC3339) import Duckling.Time.Types (toRFC3339)
import Gargantext.Parsers.Date (fromRFC3339)
-----------------------------------------------------------
import Gargantext.Prelude
import Gargantext.Text.Parsers.Date (fromRFC3339)
import Parsers.Types
-----------------------------------------------------------
fromRFC3339Inv :: Either ParseError ZonedTime -> Text fromRFC3339Inv :: Either ParseError ZonedTime -> Text
fromRFC3339Inv (Right z) = toRFC3339 z fromRFC3339Inv (Right z) = toRFC3339 z
......
...@@ -42,7 +42,7 @@ import Test.QuickCheck (elements) ...@@ -42,7 +42,7 @@ import Test.QuickCheck (elements)
-- import Control.Applicative ((<*>)) -- import Control.Applicative ((<*>))
import Gargantext.Prelude import Gargantext.Prelude
import Gargantext.Utils.Prefix (unPrefix) import Gargantext.Core.Utils.Prefix (unPrefix)
----------------------------------------------------------------------- -----------------------------------------------------------------------
type CountAPI = Post '[JSON] Counts type CountAPI = Post '[JSON] Counts
......
...@@ -40,7 +40,7 @@ import Servant ...@@ -40,7 +40,7 @@ import Servant
-- import Servant.Multipart -- import Servant.Multipart
import Gargantext.Prelude import Gargantext.Prelude
import Gargantext.Types.Node import Gargantext.Core.Types.Node
import Gargantext.Database.Node ( getNodesWithParentId import Gargantext.Database.Node ( getNodesWithParentId
, getNode, getNodesWith , getNode, getNodesWith
, deleteNode, deleteNodes) , deleteNode, deleteNodes)
......
{-|
Module : Gargantext.Core
Description :
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
Here is a longer description of this module, containing some
commentary with @some markup@.
-}
module Gargantext.Core
where
------------------------------------------------------------------------
-- | Language of a Text
-- For simplicity, we suppose text has an homogenous language
data Lang = EN | FR
-- | DE | IT | SP
-- > EN == english
-- > FR == french
-- > DE == deutch (not implemented yet)
-- > IT == italian (not implemented yet)
-- > SP == spanish (not implemented yet)
-- > ... add your language and help us to implement it (:
...@@ -13,9 +13,9 @@ commentary with @some markup@. ...@@ -13,9 +13,9 @@ commentary with @some markup@.
{-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE NoImplicitPrelude #-}
module Gargantext.Types ( module Gargantext.Types.Main module Gargantext.Core.Types ( module Gargantext.Core.Types.Main
, module Gargantext.Types.Node , module Gargantext.Core.Types.Node
) where ) where
import Gargantext.Types.Main import Gargantext.Core.Types.Main
import Gargantext.Types.Node import Gargantext.Core.Types.Node
{-| {-|
Module : Gargantext.Types.Main Module : Gargantext.Core.Types.Main
Description : Short description Description : Short description
Copyright : (c) CNRS, 2017-Present Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3 License : AGPL + CECILL v3
...@@ -19,7 +19,7 @@ commentary with @some markup@. ...@@ -19,7 +19,7 @@ commentary with @some markup@.
{-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE OverloadedStrings #-}
------------------------------------------------------------------------ ------------------------------------------------------------------------
module Gargantext.Types.Main where module Gargantext.Core.Types.Main where
------------------------------------------------------------------------ ------------------------------------------------------------------------
import Data.Maybe (fromMaybe) import Data.Maybe (fromMaybe)
...@@ -28,22 +28,10 @@ import Data.Monoid ((<>)) ...@@ -28,22 +28,10 @@ import Data.Monoid ((<>))
import Data.Text (Text, pack) import Data.Text (Text, pack)
import Data.List (lookup) import Data.List (lookup)
import Gargantext.Types.Node import Gargantext.Core.Types.Node
import Gargantext.Prelude import Gargantext.Prelude
------------------------------------------------------------------------ ------------------------------------------------------------------------
------------------------------------------------------------------------
-- | Language of a Text
-- For simplicity, we suppose text has an homogenous language
data Language = EN | FR
-- | DE | IT | SP
-- > EN == english
-- > FR == french
-- > DE == deutch (not implemented yet)
-- > IT == italian (not implemented yet)
-- > SP == spanish (not implemented yet)
-- > ... add your language and help us to implement it (:
-- All the Database is structred like a hierarchical Tree -- All the Database is structred like a hierarchical Tree
data Tree a = NodeT a [Tree a] data Tree a = NodeT a [Tree a]
deriving (Show, Read, Eq) deriving (Show, Read, Eq)
......
{-| {-|
Module : Gargantext.Types.Nodes Module : Gargantext.Core.Types.Nodes
Description : Main Types of Nodes Description : Main Types of Nodes
Copyright : (c) CNRS, 2017-Present Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3 License : AGPL + CECILL v3
...@@ -18,7 +18,7 @@ Portability : POSIX ...@@ -18,7 +18,7 @@ Portability : POSIX
{-# LANGUAGE TemplateHaskell #-} {-# LANGUAGE TemplateHaskell #-}
-- {-# LANGUAGE DuplicateRecordFields #-} -- {-# LANGUAGE DuplicateRecordFields #-}
module Gargantext.Types.Node where module Gargantext.Core.Types.Node where
import Prelude (Enum, Bounded, minBound, maxBound) import Prelude (Enum, Bounded, minBound, maxBound)
...@@ -48,7 +48,7 @@ import Test.QuickCheck.Arbitrary ...@@ -48,7 +48,7 @@ import Test.QuickCheck.Arbitrary
import Test.QuickCheck (elements) import Test.QuickCheck (elements)
import Gargantext.Prelude import Gargantext.Prelude
import Gargantext.Utils.Prefix (unPrefix) import Gargantext.Core.Utils.Prefix (unPrefix)
------------------------------------------------------------------------ ------------------------------------------------------------------------
......
...@@ -22,7 +22,7 @@ Phylomemy was first described in [REF]. ...@@ -22,7 +22,7 @@ Phylomemy was first described in [REF].
{-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE TemplateHaskell #-} {-# LANGUAGE TemplateHaskell #-}
module Gargantext.Types.Phylo where module Gargantext.Core.Types.Phylo where
import Data.Aeson.TH (deriveJSON) import Data.Aeson.TH (deriveJSON)
import Data.Maybe (Maybe) import Data.Maybe (Maybe)
...@@ -32,7 +32,7 @@ import Data.Time.Clock.POSIX (POSIXTime) ...@@ -32,7 +32,7 @@ import Data.Time.Clock.POSIX (POSIXTime)
import GHC.Generics (Generic) import GHC.Generics (Generic)
import Gargantext.Prelude import Gargantext.Prelude
import Gargantext.Utils.Prefix (unPrefix) import Gargantext.Core.Utils.Prefix (unPrefix)
------------------------------------------------------------------------ ------------------------------------------------------------------------
-- | Phylo datatype descriptor of a phylomemy -- | Phylo datatype descriptor of a phylomemy
......
...@@ -13,9 +13,9 @@ commentary with @some markup@. ...@@ -13,9 +13,9 @@ commentary with @some markup@.
{-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE NoImplicitPrelude #-}
module Gargantext.Utils ( module Gargantext.Core.Utils (
-- module Gargantext.Utils.Chronos -- module Gargantext.Utils.Chronos
module Gargantext.Utils.Prefix module Gargantext.Core.Utils.Prefix
) where ) where
-- import Gargantext.Utils.Chronos -- import Gargantext.Utils.Chronos
import Gargantext.Utils.Prefix import Gargantext.Core.Utils.Prefix
{-| {-|
Module : Gargantext.Utils.Counts Module : Gargantext.Core.Utils.Count
Description : Description :
Copyright : (c) CNRS, 2017-Present Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3 License : AGPL + CECILL v3
...@@ -9,6 +9,9 @@ Portability : POSIX ...@@ -9,6 +9,9 @@ Portability : POSIX
Here is a longer description of this module, containing some Here is a longer description of this module, containing some
commentary with @some markup@. commentary with @some markup@.
Inspired from Gabriel Gonzales, "beautiful folds" talk.
-} -}
{-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE NoImplicitPrelude #-}
...@@ -16,15 +19,17 @@ commentary with @some markup@. ...@@ -16,15 +19,17 @@ commentary with @some markup@.
{-# LANGUAGE BangPatterns #-} {-# LANGUAGE BangPatterns #-}
{-# LANGUAGE RankNTypes #-} {-# LANGUAGE RankNTypes #-}
module Data.Gargantext.Utils.Count (head, last, all, any, sum, product, length) module Gargantext.Core.Utils.Count (head, last, all, any, sum, product, length)
where where
import Data.Monoid import Data.Functor
import Protolude hiding ((<>), head, last, all, any, sum, product, length) import Control.Applicative
import qualified Data.Foldable import qualified Data.Foldable
import Data.Monoid
import Control.Lens (Getting, foldMapOf) import Control.Lens (Getting, foldMapOf)
import Gargantext.Prelude hiding (head, sum, length)
data Fold i o = forall m . Monoid m => Fold (i -> m) (m -> o) data Fold i o = forall m . Monoid m => Fold (i -> m) (m -> o)
instance Functor (Fold i) where instance Functor (Fold i) where
......
{-| {-|
Module : Gargantext.Utils.DateUtils Module : Gargantext.Core.Utils.DateUtils
Description : Description :
Copyright : (c) CNRS, 2017-Present Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3 License : AGPL + CECILL v3
...@@ -10,7 +10,7 @@ Portability : POSIX ...@@ -10,7 +10,7 @@ Portability : POSIX
{-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE NoImplicitPrelude #-}
module Gargantext.Utils.DateUtils where module Gargantext.Core.Utils.DateUtils where
import Gargantext.Prelude import Gargantext.Prelude
import Data.Time (UTCTime, toGregorian, utctDay) import Data.Time (UTCTime, toGregorian, utctDay)
......
{-| {-|
Module : Gargantext.Utils.Prefix Module : Gargantext.Core.Utils.Prefix
Description : Description :
Copyright : (c) CNRS, 2017-Present Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3 License : AGPL + CECILL v3
...@@ -13,7 +13,7 @@ commentary with @some markup@. ...@@ -13,7 +13,7 @@ commentary with @some markup@.
{-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE NoImplicitPrelude #-}
module Gargantext.Utils.Prefix where module Gargantext.Core.Utils.Prefix where
import Prelude import Prelude
......
...@@ -49,13 +49,13 @@ import qualified Opaleye.Internal.Unpackspec() ...@@ -49,13 +49,13 @@ import qualified Opaleye.Internal.Unpackspec()
import Test.QuickCheck.Arbitrary import Test.QuickCheck.Arbitrary
import Test.QuickCheck (elements) import Test.QuickCheck (elements)
import Gargantext.Types import Gargantext.Core.Types
import Gargantext.Types.Node (NodeType) import Gargantext.Core.Types.Node (NodeType)
import Gargantext.Core.Utils.Prefix (unPrefix)
import Gargantext.Database.NodeNode import Gargantext.Database.NodeNode
import Gargantext.Database.NodeNodeNgram import Gargantext.Database.NodeNodeNgram
import Gargantext.Database.Node import Gargantext.Database.Node
import Gargantext.Database.Queries import Gargantext.Database.Queries
import Gargantext.Utils.Prefix (unPrefix)
-- import Gargantext.Database.NodeNgram -- import Gargantext.Database.NodeNgram
------------------------------------------------------------------------ ------------------------------------------------------------------------
......
...@@ -29,8 +29,8 @@ import Database.PostgreSQL.Simple.FromField ( Conversion ...@@ -29,8 +29,8 @@ import Database.PostgreSQL.Simple.FromField ( Conversion
) )
import Prelude hiding (null, id, map, sum) import Prelude hiding (null, id, map, sum)
import Gargantext.Types import Gargantext.Core.Types
import Gargantext.Types.Node (NodeType) import Gargantext.Core.Types.Node (NodeType)
import Gargantext.Database.Queries import Gargantext.Database.Queries
import Gargantext.Prelude hiding (sum) import Gargantext.Prelude hiding (sum)
......
...@@ -22,7 +22,7 @@ Portability : POSIX ...@@ -22,7 +22,7 @@ Portability : POSIX
module Gargantext.Database.Queries where module Gargantext.Database.Queries where
import Gargantext.Prelude import Gargantext.Prelude
import Gargantext.Types (Limit, Offset, NodePoly) import Gargantext.Core.Types (Limit, Offset, NodePoly)
import Data.Maybe (Maybe, maybe) import Data.Maybe (Maybe, maybe)
import Control.Arrow ((>>>)) import Control.Arrow ((>>>))
import Control.Applicative ((<*>)) import Control.Applicative ((<*>))
......
...@@ -24,7 +24,6 @@ commentary with @some markup@. ...@@ -24,7 +24,6 @@ commentary with @some markup@.
module Gargantext.Database.User where module Gargantext.Database.User where
import Gargantext.Prelude
import GHC.Show(Show(..)) import GHC.Show(Show(..))
import Data.Eq(Eq(..)) import Data.Eq(Eq(..))
import Data.Time (UTCTime) import Data.Time (UTCTime)
...@@ -40,6 +39,8 @@ import Opaleye ...@@ -40,6 +39,8 @@ import Opaleye
-- Functions only -- Functions only
import Data.List (find) import Data.List (find)
import Gargantext.Prelude
data UserLight = UserLight { userLight_id :: Int data UserLight = UserLight { userLight_id :: Int
, userLight_username :: Text , userLight_username :: Text
......
module Data.Gargantext.Error where
import Control.Exception (Exception)
import Data.Typeable (Typeable)
import GHC.Generics (Generic)
import Network.HTTP.Client (HttpException)
import System.Exit (ExitCode (..), exitWith)
import System.IO (stderr)
--import Text.Parsec.Error
--import Text.Parsec.Pos hiding (Line)
data GargError = GargIOError String IOError
| GargHttpError String HttpException
| GargParseError String
| GargNgramsError String
| GargDatabaseError String
deriving (Show, Typeable, Generic)
instance Exception PandocError
-- | Handle GargError by exiting with an error message.
handleError :: Either GargError a -> IO a
handleError (Right r) = pure r
handleError (Left e) =
case e of
GargIOError _ err' -> ioError err'
GargHttpError u err' -> err 61 $
"Could not fetch " ++ u ++ "\n" ++ show err'
GargParseError s -> err 64 s
_ s -> err 0 s
err :: Int -> String -> IO a
err exitCode msg = do
UTF8.hPutStrLn stderr msg
exitWith $ ExitFailure exitCode
return undefined
...@@ -15,38 +15,18 @@ n non negative integer ...@@ -15,38 +15,18 @@ n non negative integer
-} -}
{-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE OverloadedStrings #-}
module Gargantext.Text ( module Gargantext.Text.Letters module Gargantext.Text
--, module Gargantext.Text.Hetero where
, module Gargantext.Text.CoreNLP
, module Gargantext.Text.Parser
, module Gargantext.Text.Occurrences
, module Gargantext.Text.TextMining
, module Gargantext.Text.Metrics
, Ngrams(..), ngrams, occ, sumOcc, text2fis, clean
, ListName(..), equivNgrams, isGram, sentences
, ngramsTest
) where
import Gargantext.Text.Letters
--import Gargantext.Text.Hetero
import Gargantext.Text.CoreNLP
import Gargantext.Text.Parser
import Gargantext.Text.Occurrences
import Gargantext.Text.TextMining
--import Gargantext.Text.Words
import Gargantext.Text.Metrics
import qualified Gargantext.Text.FrequentItemSet as FIS
-----------------------------------------------------------------
import Data.List (sort)
import Data.Char (Char, isAlphaNum, isSpace) import Data.Char (Char, isAlphaNum, isSpace)
import Data.Text (Text, filter, toLower, split, lines, concat) import Data.Text (Text, filter, toLower, split, splitOn)
import qualified Data.Text as DT import qualified Data.Text as DT
import Data.Text.IO (readFile) --import Data.Text.IO (readFile)
import Data.Set (Set)
import qualified Data.Set as S
import Data.Map.Strict (Map import Data.Map.Strict (Map
, empty , empty
, insertWith, unionWith , insertWith, unionWith
...@@ -55,25 +35,38 @@ import Data.Map.Strict (Map ...@@ -55,25 +35,38 @@ import Data.Map.Strict (Map
) )
import qualified Data.Map.Strict as M (filter) import qualified Data.Map.Strict as M (filter)
import Data.Foldable (foldl') import Data.Foldable (foldl')
import Gargantext.Prelude hiding (filter)
-- Maybe useful later: -----------------------------------------------------------------
--import NLP.Stemmer (stem, Stemmer(..)) import Gargantext.Text.Ngrams.Stem.En
--import Language.Aspell (check, suggest, spellChecker, spellCheckerWithOptions)
--import Language.Aspell.Options (ACOption(..)) import qualified Gargantext.Text.Metrics.FrequentItemSet as FIS
import Gargantext.Prelude hiding (filter)
-----------------------------------------------------------------
data ListName = Stop | Candidate | Graph data ListName = Stop | Candidate | Graph
deriving (Show, Eq) deriving (Show, Eq)
data Ngrams = Ngrams { _ngramsNgrams :: [Text]
, _ngramsStem :: [Text]
, _ngramsListName :: Maybe ListName data Ngroup = Ngroup { _ngroup_label :: Ngrams
, _ngroup_ngrams :: [Ngrams]
} deriving (Show)
data Ngrams = Ngrams { _ngrams_label :: [Text]
, _ngrams_stem :: Set Text
} deriving (Show) } deriving (Show)
text2ngrams :: Text -> Ngrams
text2ngrams txt = Ngrams txt' (S.fromList $ map stem txt')
where
txt' = splitOn " " txt
equivNgrams :: Ngrams -> Ngrams -> Bool equivNgrams :: Ngrams -> Ngrams -> Bool
equivNgrams (Ngrams n1 s1 _) (Ngrams n2 s2 _) equivNgrams (Ngrams _ s1) (Ngrams _ s2) = s1 `S.isSubsetOf` s2
= (sort n1) == (sort n2) || (sort s1) == (sort s2) || s2 `S.isSubsetOf` s1
type Occ = Int type Occ = Int
--type Index = Int --type Index = Int
...@@ -149,20 +142,25 @@ sentences txt = split isStop txt ...@@ -149,20 +142,25 @@ sentences txt = split isStop txt
isStop :: Char -> Bool isStop :: Char -> Bool
isStop c = c `elem` ['.','?','!'] isStop c = c `elem` ['.','?','!']
---- | https://en.wikipedia.org/wiki/Text_mining
-- | Tests --testText :: Text
ngramsTest fp = ws --testText = DT.pack "Text mining, also referred to as text data mining, roughly equivalent to text analytics, is the process of deriving high-quality information from text. High-quality information is typically derived through the devising of patterns and trends through means such as statistical pattern learning. Text mining usually involves the process of structuring the input text (usually parsing, along with the addition of some derived linguistic features and the removal of others, and subsequent insertion into a database), deriving patterns within the structured data, and finally evaluation and interpretation of the output. 'High quality' in text mining usually refers to some combination of relevance, novelty, and interestingness. Typical text mining tasks include text categorization, text clustering, concept/entity extraction, production of granular taxonomies, sentiment analysis, document summarization, and entity relation modeling (i.e., learning relations between named entities). Text analysis involves information retrieval, lexical analysis to study word frequency distributions, pattern recognition, tagging/annotation, information extraction, data mining techniques including link and association analysis, visualization, and predictive analytics. The overarching goal is, essentially, to turn text into data for analysis, via application of natural language processing (NLP) and analytical methods. A typical application is to scan a set of documents written in a natural language and either model the document set for predictive classification purposes or populate a database or search index with the information extracted."
where --
txt = concat <$> lines <$> clean <$> readFile fp --
-- | Number of sentences --
ls = sentences <$> txt ---- | Tests
-- | Number of monograms used in the full text ----ngramsTest :: [Text]
ws = ngrams <$> txt --ngramsTest = ocs
-- | stem ngrams -- where
-- TODO -- --txt = concat <$> lines <$> clean <$> readFile filePath
-- group ngrams -- txt = clean $ testText
ocs = occ <$> ws -- -- | Number of sentences
-- ls = sentences $ txt
-- -- -- | Number of monograms used in the full text
-- ws = ngrams $ txt
-- -- | stem ngrams
-- -- TODO
-- -- group ngrams
-- ocs = occ $ ws
--
--
{-|
Module : Gargantext.Analysis
Description : Gargantext Analysis
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
{-# LANGUAGE NoImplicitPrelude #-}
module Gargantext.Text.Analysis
where
import Gargantext.Prelude (undefined, IO(), Int())
-- import qualified Data.Text.Lazy as DTL
import Data.Text
import Opaleye (Column, PGInt4)
--import Data.Map as DM
--import Data.Vector as DV
-- | Simple function to count Occurrences in a context of text.
occOfDocument :: Column PGInt4 -> Text -> IO Int
occOfDocument = undefined
--occOfDocument c_id txt = do
-- docs <- pm (hyperdataDocument_Abstract . node_hyperdata) <$> getCorpusDocument c_id
-- let occs = pm (\x -> maybe "" identity x) docs
-- let result = case sequence $ pm (parseOccurrences txt) occs of
-- -- TODO find a way to get nice message d'errors (file, function, line)
-- Left str -> error $ "[ERRROR] at file/function/line" ++ str
-- Right xs -> xs
-- pure (sum result)
data Occurrences a b = Map a b
{-|
Module : Gargantext.Text.Letters
Description : Ngrams.Letters module
Copyright : (c) CNRS, 2017
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
Sugar to work on letters with Text.
-}
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE OverloadedStrings #-}
module Gargantext.Text.Letters where
import qualified Data.Text.Lazy as DTL
-- import qualified Data.Text.Lazy.IO as DTLIO
import Gargantext.Prelude
-- | /O(n)/ Breaks a 'Text' up into each Text list of chars.
-- from slower to faster:
letters :: DTL.Text -> [DTL.Text]
letters text = DTL.chunksOf 1 text
letters' :: DTL.Text -> [DTL.Text]
letters' text = DTL.splitOn "#" $ DTL.intersperse '#' text
letters'' :: DTL.Text -> [DTL.Text]
letters'' = DTL.foldr (\ch xs -> DTL.singleton ch : xs) []
{-| {-|
Module : Gargantext.Text.Metrics Module : Gargantext.Text.Metrics
Description : Short description Description : All parsers of Gargantext in one file.
Copyright : (c) Some Guy, 2013 Copyright : (c) CNRS, 2017 - present
Someone Else, 2014 License : AGPL + CECILL v3
License : GPL-3 Maintainer : team@gargantext.org
Maintainer : sample@email.com
Stability : experimental Stability : experimental
Portability : POSIX Portability : POSIX
...@@ -13,6 +12,7 @@ Mainly reexport functions in @Data.Text.Metrics@ ...@@ -13,6 +12,7 @@ Mainly reexport functions in @Data.Text.Metrics@
{-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE NoImplicitPrelude #-}
module Gargantext.Text.Metrics (levenshtein module Gargantext.Text.Metrics (levenshtein
, levenshteinNorm , levenshteinNorm
, damerauLevenshtein , damerauLevenshtein
...@@ -22,12 +22,12 @@ module Gargantext.Text.Metrics (levenshtein ...@@ -22,12 +22,12 @@ module Gargantext.Text.Metrics (levenshtein
, hamming , hamming
) where ) where
import Gargantext.Prelude
import Data.Text (Text) import Data.Text (Text)
import GHC.Real (Ratio) import GHC.Real (Ratio)
import qualified Data.Text.Metrics as DTM import qualified Data.Text.Metrics as DTM
import Gargantext.Prelude
{- * Example de titre {- * Example de titre
-} -}
......
{-| {-|
Module : Gargantext.Text.FrequentItemSet Module : Gargantext.Text.Metrics.FrequentItemSet
Description : Ngrams tools Description : Ngrams tools
Copyright : (c) CNRS, 2018 Copyright : (c) CNRS, 2018
License : AGPL + CECILL v3 License : AGPL + CECILL v3
...@@ -13,7 +13,7 @@ Domain Specific Language to manage Frequent Item Set (FIS) ...@@ -13,7 +13,7 @@ Domain Specific Language to manage Frequent Item Set (FIS)
{-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE NoImplicitPrelude #-}
module Gargantext.Text.FrequentItemSet module Gargantext.Text.Metrics.FrequentItemSet
( Fis, Size ( Fis, Size
, occ, cooc , occ, cooc
, all, between , all, between
......
{-|
Module : Gargantext.
Description :
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
Here is a longer description of this module, containing some
commentary with @some markup@.
-}
module Gargantext.Text.Hetero where module Gargantext.Text.Hetero where
import GHC.Real as R import GHC.Real as R
......
{-| {-|
Module : Gargantext.Text.Occurrences Module : Gargantext.Text.Metrics.Occurrences
Description : Description :
Copyright : (c) CNRS, 2017-Present Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3 License : AGPL + CECILL v3
...@@ -14,7 +14,7 @@ commentary with @some markup@. ...@@ -14,7 +14,7 @@ commentary with @some markup@.
{-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE OverloadedStrings #-}
module Gargantext.Text.Occurrences where module Gargantext.Text.Metrics.Occurrences where
import Gargantext.Prelude import Gargantext.Prelude
......
{-| {-|
Module : Gargantext.Text.TFICF Module : Gargantext.Text.Metrics.TFICF
Description : TFICF Ngrams tools Description : TFICF Ngrams tools
Copyright : (c) CNRS, 2017 Copyright : (c) CNRS, 2017
License : AGPL + CECILL v3 License : AGPL + CECILL v3
...@@ -15,7 +15,7 @@ Definition of TFICF ...@@ -15,7 +15,7 @@ Definition of TFICF
{-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE NoImplicitPrelude #-}
module Gargantext.Text.TFICF where module Gargantext.Text.Metrics.TFICF where
import GHC.Generics (Generic) import GHC.Generics (Generic)
......
{-|
Module : Gargantext.Parsers
Description : All parsers of Gargantext in one file.
Copyright : (c) CNRS, 2017 - present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
Mainly reexport functions in @Data.Text.Metrics@
-}
{-# LANGUAGE NoImplicitPrelude #-}
module Gargantext.Text.Ngrams
where
{-| {-|
Module : Gargantext.Text.List Module : Gargantext.Text.Ngrams.Lists
Description : Description :
Copyright : (c) CNRS, 2017-Present Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3 License : AGPL + CECILL v3
...@@ -13,19 +13,20 @@ commentary with @some markup@. ...@@ -13,19 +13,20 @@ commentary with @some markup@.
{-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE NoImplicitPrelude #-}
module Gargantext.Text.List where module Gargantext.Text.Ngrams.Lists
where
import Data.Maybe
import Data.List (filter) --import Data.Maybe
import Gargantext.Text --import Data.List (filter)
import Gargantext.Prelude --import Gargantext.Text
--import Gargantext.Prelude
graph :: [Ngrams] -> [Ngrams] --
graph ngs = filter (\ng -> _ngramsListName ng == Just Graph) ngs --graph :: [Ngrams] -> [Ngrams]
--graph ngs = filter (\ng -> _ngramsListName ng == Just Graph) ngs
candidates :: [Ngrams] -> [Ngrams] --
candidates ngs = filter (\ng -> _ngramsListName ng == Just Candidate) ngs --candidates :: [Ngrams] -> [Ngrams]
--candidates ngs = filter (\ng -> _ngramsListName ng == Just Candidate) ngs
stop :: [Ngrams] -> [Ngrams] --
stop ngs = filter (\ng -> _ngramsListName ng == Just Stop) ngs --stop :: [Ngrams] -> [Ngrams]
--stop ngs = filter (\ng -> _ngramsListName ng == Just Stop) ngs
{-|
Module : Gargantext.Text.Ngrams.PosTagging
Description : All parsers of Gargantext in one file.
Copyright : (c) CNRS, 2017 - present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
Mainly reexport functions in @Data.Text.Metrics@
-}
{-# LANGUAGE NoImplicitPrelude #-}
module Gargantext.Text.Ngrams.PosTagging
where
{-| {-|
Module : Gargantext.Text.CoreNLP Module : Gargantext.Text.Ngrams.PosTagging.CoreNLP
Description : CoreNLP module Description : CoreNLP module
Copyright : (c) CNRS, 2017 Copyright : (c) CNRS, 2017
License : AGPL + CECILL v3 License : AGPL + CECILL v3
...@@ -16,18 +16,20 @@ Portability : POSIX ...@@ -16,18 +16,20 @@ Portability : POSIX
{-# LANGUAGE TemplateHaskell #-} {-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE TypeOperators #-} {-# LANGUAGE TypeOperators #-}
module Gargantext.Text.CoreNLP where module Gargantext.Text.Ngrams.PosTagging.CoreNLP
where
import Data.Aeson.TH (deriveJSON)
import GHC.Generics import GHC.Generics
import Data.Monoid ((<>))
import GHC.Show (Show(..)) import GHC.Show (Show(..))
import Gargantext.Types.Main (Language(..)) import Data.Aeson.TH (deriveJSON)
import Gargantext.Prelude import Data.Monoid ((<>))
import Gargantext.Utils.Prefix (unPrefix)
import Data.Text (Text) import Data.Text (Text)
import Gargantext.Core (Lang(..))
import Gargantext.Core.Utils.Prefix (unPrefix)
import Gargantext.Prelude
import Network.HTTP.Simple import Network.HTTP.Simple
...@@ -80,7 +82,7 @@ $(deriveJSON (unPrefix "_") ''Sentences) ...@@ -80,7 +82,7 @@ $(deriveJSON (unPrefix "_") ''Sentences)
-- --
corenlp :: Language -> Text -> IO Sentences corenlp :: Lang -> Text -> IO Sentences
corenlp lang txt = do corenlp lang txt = do
let properties = case lang of let properties = case lang of
EN -> "{\"annotators\": \"tokenize,ssplit,pos,ner\", \"outputFormat\": \"json\"}" EN -> "{\"annotators\": \"tokenize,ssplit,pos,ner\", \"outputFormat\": \"json\"}"
...@@ -99,7 +101,7 @@ corenlp lang txt = do ...@@ -99,7 +101,7 @@ corenlp lang txt = do
-- Named Entity Recognition example -- Named Entity Recognition example
-- parseWith _tokenNer "Hello world of Peter." -- parseWith _tokenNer "Hello world of Peter."
-- [[("``","O"),("Hello","O"),("world","O"),("of","O"),("Peter","PERSON"),(".","O"),("''","O")]] -- [[("``","O"),("Hello","O"),("world","O"),("of","O"),("Peter","PERSON"),(".","O"),("''","O")]]
tokenWith :: (Token -> t) -> Language -> Text -> IO [[(Text, t)]] tokenWith :: (Token -> t) -> Lang -> Text -> IO [[(Text, t)]]
tokenWith f lang s = map (map (\t -> (_tokenWord t, f t))) <$> map _sentenceTokens <$> _sentences <$> corenlp lang s tokenWith f lang s = map (map (\t -> (_tokenWord t, f t))) <$> map _sentenceTokens <$> _sentences <$> corenlp lang s
......
{-| {-|
Module : Gargantext.Text.Lang.En Module : Gargantext.Text.Ngrams.PosTagging.Lang.En
Description : Description :
Copyright : (c) CNRS, 2017-Present Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3 License : AGPL + CECILL v3
...@@ -14,7 +14,8 @@ commentary with @some markup@. ...@@ -14,7 +14,8 @@ commentary with @some markup@.
{-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE OverloadedStrings #-}
module Gargantext.Text.Lang.En (selectNgrams, groupNgrams, textTest) where module Gargantext.Text.Ngrams.PosTagging.Lang.En (selectNgrams, groupNgrams, textTest)
where
import Gargantext.Prelude import Gargantext.Prelude
import Data.Text (Text) import Data.Text (Text)
......
{-| {-|
Module : Gargantext.Text.Lang.Fr Module : Gargantext.Text.Ngrams.PosTagging.Lang.Fr
Description : Description :
Copyright : (c) CNRS, 2017-Present Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3 License : AGPL + CECILL v3
...@@ -14,7 +14,7 @@ commentary with @some markup@. ...@@ -14,7 +14,7 @@ commentary with @some markup@.
{-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE OverloadedStrings #-}
module Gargantext.Text.Lang.Fr (selectNgrams, groupNgrams, textTest) module Gargantext.Text.Ngrams.PosTagging.Lang.Fr (selectNgrams, groupNgrams, textTest)
where where
import Gargantext.Prelude import Gargantext.Prelude
......
{-| {-|
Module : Gargantext.Text.Parser Module : Gargantext.Text.Ngrams.PosTagging.Parser
Description : Description :
Copyright : (c) CNRS, 2017-Present Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3 License : AGPL + CECILL v3
...@@ -15,15 +15,16 @@ commentary with @some markup@. ...@@ -15,15 +15,16 @@ commentary with @some markup@.
{-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE ScopedTypeVariables #-} {-# LANGUAGE ScopedTypeVariables #-}
module Gargantext.Text.Parser where module Gargantext.Text.Ngrams.PosTagging.Parser
where
import Gargantext.Prelude import Gargantext.Prelude
import Gargantext.Text.CoreNLP
import Data.Text hiding (map) import Data.Text hiding (map)
import Gargantext.Types.Main (Language(..)) import Gargantext.Core (Lang(..))
import qualified Gargantext.Text.Lang.En as En import Gargantext.Text.Ngrams.PosTagging.CoreNLP
import qualified Gargantext.Text.Lang.Fr as Fr import qualified Gargantext.Text.Ngrams.PosTagging.Lang.En as En
import qualified Gargantext.Text.Ngrams.PosTagging.Lang.Fr as Fr
type SNgrams = (Text, Text, Text) type SNgrams = (Text, Text, Text)
...@@ -45,11 +46,11 @@ type SNgrams = (Text, Text, Text) ...@@ -45,11 +46,11 @@ type SNgrams = (Text, Text, Text)
-- TODO for scientific papers: add maesures -- TODO for scientific papers: add maesures
-- TODO add the p score regex -- TODO add the p score regex
extractNgrams :: Language -> Text -> IO [[SNgrams]] extractNgrams :: Lang -> Text -> IO [[SNgrams]]
extractNgrams lang s = map (groupNgrams lang) <$> extractNgrams' lang s extractNgrams lang s = map (groupNgrams lang) <$> extractNgrams' lang s
extractNgrams' :: Language -> Text -> IO [[SNgrams]] extractNgrams' :: Lang -> Text -> IO [[SNgrams]]
extractNgrams' lang t = map (map token2text) extractNgrams' lang t = map (map token2text)
<$> map _sentenceTokens <$> map _sentenceTokens
<$> _sentences <$> _sentences
...@@ -58,13 +59,13 @@ extractNgrams' lang t = map (map token2text) ...@@ -58,13 +59,13 @@ extractNgrams' lang t = map (map token2text)
-- | This function selects ngrams according to grammars specific -- | This function selects ngrams according to grammars specific
-- of each language. -- of each language.
-- In english, JJ is ADJectiv in french. -- In english, JJ is ADJectiv in french.
selectNgrams :: Language -> [SNgrams] -> [SNgrams] selectNgrams :: Lang -> [SNgrams] -> [SNgrams]
selectNgrams EN = En.selectNgrams selectNgrams EN = En.selectNgrams
selectNgrams FR = Fr.selectNgrams selectNgrams FR = Fr.selectNgrams
-- | This function analyze and groups (or not) ngrams according to -- | This function analyze and groups (or not) ngrams according to
-- grammars specific of each language. -- grammars specific of each language.
groupNgrams :: Language -> [SNgrams] -> [SNgrams] groupNgrams :: Lang -> [SNgrams] -> [SNgrams]
groupNgrams EN = En.groupNgrams groupNgrams EN = En.groupNgrams
groupNgrams FR = Fr.groupNgrams groupNgrams FR = Fr.groupNgrams
{-|
Module : Gargantext.Text.Ngrams.Stem
Description :
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
Here is a longer description of this module, containing some
commentary with @some markup@.
-}
module Gargantext.Text.Ngrams.Stem
where
import Data.Text (Text)
import qualified Data.Text as DT
import qualified NLP.Stemmer as N
import Gargantext.Core (Lang(..))
-- (stem, Stemmer(..))
--import Language.Aspell (check, suggest, spellChecker, spellCheckerWithOptions)
--import Language.Aspell.Options (ACOption(..))
stem :: Lang -> Text -> Text
stem lang = DT.pack . N.stem lang' . DT.unpack
where
lang' = case lang of
EN -> N.English
FR -> N.French
{-| {-|
Module : Gargantext. Module : Gargantext.Text.Ngrams.Stem.En
Description : Porter Algorithm Implementation purely Haskell Description : Porter Algorithm Implementation purely Haskell
Copyright : (c) CNRS, 2017-Present Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3 License : AGPL + CECILL v3
...@@ -17,7 +17,7 @@ Adapted from: ...@@ -17,7 +17,7 @@ Adapted from:
{-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE NoImplicitPrelude #-}
module Gargantext.Text.Stem.En module Gargantext.Text.Ngrams.Stem.En
where where
import Control.Monad import Control.Monad
......
{-| {-|
Module : Gargantext.Text.Token.Text Module : Gargantext.Text.Ngrams.Token.Text
Description : Description :
Copyright : (c) CNRS, 2017-Present Copyright : (c) Grzegorz Chrupała first, after: CNRS, 2018-Present
License : AGPL + CECILL v3 License : AGPL + CECILL v3
Maintainer : team@gargantext.org Maintainer : team@gargantext.org
Stability : experimental Stability : experimental
...@@ -13,7 +13,7 @@ Inspired from https://bitbucket.org/gchrupala/lingo/overview ...@@ -13,7 +13,7 @@ Inspired from https://bitbucket.org/gchrupala/lingo/overview
{-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE OverloadedStrings #-}
module Gargantext.Text.Token.Text module Gargantext.Text.Ngrams.Token.Text
( EitherList(..) ( EitherList(..)
, Tokenizer , Tokenizer
, tokenize , tokenize
...@@ -28,7 +28,7 @@ module Gargantext.Text.Token.Text ...@@ -28,7 +28,7 @@ module Gargantext.Text.Token.Text
, contractions , contractions
, negatives , negatives
) )
where where
import qualified Data.Char as Char import qualified Data.Char as Char
import Data.Maybe import Data.Maybe
......
{-| {-|
Module : Gargantext.Parsers Module : Gargantext.Text.Parsers
Description : All parsers of Gargantext in one file. Description : All parsers of Gargantext in one file.
Copyright : (c) CNRS, 2017 Copyright : (c) CNRS, 2017
License : AGPL + CECILL v3 License : AGPL + CECILL v3
......
{-|
Module : Gargantext.Text.TextMining
Description :
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
Here is a longer description of this module, containing some
commentary with @some markup@.
-}
{-# LANGUAGE NoImplicitPrelude #-}
module Gargantext.Text.TextMining where
import Gargantext.Prelude
import Data.Ord(Ordering(LT,GT), compare)
import Data.Text (pack)
import Data.Bool (otherwise)
import Data.Map (empty, Map, insertWith, toList)
import Data.List (foldl, foldl')
import qualified Data.List as L
sortGT :: (Ord a, Ord b) => (a, b) -> (a, b) -> Ordering
sortGT (a1, b1) (a2, b2)
| a1 < a2 = GT
| a1 > a2 = LT
| a1 == a2 = compare b1 b2
sortGT (_, _) (_, _) = panic (pack "What is this case ?")
--histogram :: Ord a => [a] -> [(a, Int)]
--histogram = map (head &&& length) Prelude.. group Prelude.. sort Prelude.. words
--histogram = sortGT Prelude.. $ map (head &&& length) Prelude.. group Prelude.. sort Prelude.. words
countElem :: (Ord k) => Data.Map.Map k Int -> k -> Data.Map.Map k Int
countElem m e = Data.Map.insertWith (\n o -> n + o) e 1 m
freqList :: (Ord k) => [k] -> Data.Map.Map k Int
freqList = foldl countElem Data.Map.empty
getMaxFromMap :: Ord a => Map a1 a -> [a1]
getMaxFromMap m = go [] Nothing (toList m)
where
go ks _ [] = ks
go ks Nothing ((k,v):rest) = go (k:ks) (Just v) rest
go ks (Just u) ((k,v):rest)
| v < u = go ks (Just u) rest
| v > u = go [k] (Just v) rest
| otherwise = go (k:ks) (Just v) rest
merge :: [a] -> [a] -> [a]
merge [] ys = ys
merge (x:xs) ys = x:merge ys xs
average :: [Double] -> Double
average x = L.sum x / L.genericLength x
average' :: [Int] -> Double
average' x = (L.sum y) / (L.genericLength y) where
y = L.map fromIntegral x
countYear :: [Integer] -> Map Integer Integer
countYear [] = empty
countYear (x:xs) = insertWith (+) x 1 (countYear xs)
countYear' :: [Integer] -> Map Integer Integer
countYear' (xs) = foldl' (\x y -> insertWith (+) y 1 x) empty xs
-- Word2Vec
-- Word Vector in a Field
module Data.Gargantext.Utils.Chronos where
import Data.Maybe (fromJust, isJust, isNothing)
import qualified Data.Time as DT
import qualified Data.UTC as DU
import Data.Time
import Data.Time.Clock.POSIX
import Text.Regex
parseDate :: String -> Maybe [String]
parseDate d = matchRegex (mkRegex "(.*)/(.*)/(.*)") d
getDate' :: Maybe [String] -> (Integer, Int, Int)
getDate' d
| isJust d == True = toGregorian $ fromGregorian (read year) (read month) (read day)
| otherwise = toGregorian $ fromGregorian 2015 1 1
where
Just [day, month, year] = d
getDate :: String -> (Integer, Int, Int)
getDate = getDate' . parseDate
--getDateDay :: Maybe [String] -> Day
--getDateDay d = fromGregorian (read year) (read month) (read day)
-- where Just [day, month, year] = matchRegex (mkRegex "(.*)/(.*)/(.*)") d
getDateDay' :: Maybe [String] -> Day
getDateDay' d
| isJust d == True = fromGregorian (read year) (read month) (read day)
| otherwise = fromGregorian 2015 1 1
where Just [day, month, year] = d
getDateDay :: String -> Day
getDateDay = getDateDay' . parseDate
getDateUTC :: String -> String
getDateUTC d = show $ DT.UTCTime (getDateDay d) (DT.timeOfDayToTime $ DT.TimeOfDay 0 0 0)
getYear :: String -> String
getYear date = s where
(y, m, d) = getDate date
s = show y
getMonth :: String -> String
getMonth date = s where
(y, m, d) = getDate date
s = show m
getDay :: String -> String
getDay date = s where
(y, m, d) = getDate date
s = show d
--for Dates exported via xls2csv tool
type MT = Maybe (DU.Local DU.DateTime)
type MS = Maybe String
--getDate'' :: String -> String
--getDate'' gd = d where
-- start = "1900-01-01T00:00:00Z"
-- da = (DU.parseRfc3339 start :: MT) >>= DU.addDays ( (read gd :: Integer) -2) >>= DU.renderRfc3339 :: MS
-- d = fromJust da
--
--getDate''' :: String -> String
--getDate''' gd = d where
-- start = "1900-01-01T00:00:00Z"
-- da = (DU.parseRfc3339 start :: MT) >>= DU.addDays ( (read gd :: Integer) -2) >>= DU.renderIso8601CalendarDate :: MS
-- d = fromJust da
--
--date2greg :: String ->
date2greg date = (y, m, d) where
(y, m, d) = DT.toGregorian $ DT.addDays ((read date :: Integer) -2) $ DT.utctDay (read "1900-01-01 00:00:00" :: DT.UTCTime)
getYear' :: String -> String
getYear' date = s where
(y, m, d) = date2greg date
s = show y
getMonth' :: String -> String
getMonth' date = s where
(y, m, d) = date2greg date
s = show m
getDay' :: String -> String
getDay' date = s where
(y, m, d) = date2greg date
s = show d
-- DEFINITIONS as SPECS
-- (Engineering axioms for Gargantext)
------------------------------------------------------------------------
-- From file to corpus
------------------------------------------------------------------------
-- > A Corpus is a list of Documents
data Corpus = [Document]
-- > A Document should have a date, some text and a maybe a language.
-- > Remarks :
-- > If no date then force one ?
-- > Analyze either text or numbers
-- > only one language per document
data Document = Document { date :: UTCTime
, uce :: Map Text $ Either (Maybe Text) (Maybe Double)
, lang :: Maybe Language
}
parseFiles :: Maybe ParserType -> [File] -> Corpus
parseFiles = undefined
-- This function exists already (in Python)
parseFile' :: ParserType -> File -> Maybe [Document]
parseFile' = undefined
-- This function does not exist yet
parseFile :: Maybe ParserType -> File -> Maybe [Document]
parseFile parserType file = documents
where
documents = case parserType of
Nothing -> case guessParserType file of
Nothing -> askUser "Answer to the question with link to $doc"
Just parserType' -> parseFile (Just parserType') file
Just parserType'' -> case parserType'' of
UnsupportedYet -> askUser "Not supported yet, which priority ?"
otherwise -> parseFile' parserType'' file
data ParserType = RIS | ISI | XML | CSV | Europresse | Book | UnsupportedYet
guessParserType :: File -> Maybe ParserType
guessParserType = undefined
------------------------------------------------------------------------
-- What kind of interactions with our users ?
------------------------------------------------------------------------
-- Question is Text only
type Question = Text
-- Possible Answers:
data Answer = ClosedAnswer | NumAnswer | OpenAnswer
-- Definitions of the Answers
type ClosedAnswer = Bool
type OpenAnswer = Text
type NumAnswer = Int
-- Un formulaire est un mapping entre question et peut-être une réponse
-- Un formulaire vide a Nothing au champs (Maybe Answer)
-- Une question répondue a la valeur (Just Response)
type Formular = Map Question (Maybe Answer)
askUser :: Question -> ClosedAnswer
askUser = undefined
data Advice = BugReport | WishList
askUser' :: Question -> Advice
askUser' question = case askUser question of
True -> BugReport
False -> WishList
------------------------------------------------------------------------
-- Specs for Lang Detection
------------------------------------------------------------------------
data Language = English | French
tagDoc :: Document -> Ngrams
tagDoc doc = ngrams
where
ngrams = case lang doc of
Nothing -> case guessLang doc of
Nothing -> tag
------------------------------------------------------------------------
-- Specs for ngrams Worflow
------------------------------------------------------------------------
module Data.Gargantext.Utils.SaveGetHash where
import System.FilePath (addExtension, joinPath)
import Data.ByteString.Lazy.Internal (ByteString)
import Data.List (elem, intersperse, insert)
import Data.List.Extra (chunksOf)
import Data.Digest.Pure.MD5 (md5)
import System.Directory (getDirectoryContents, createDirectory, findFile, createDirectoryIfMissing)
import Control.Monad (foldM)
import Data.List (splitAt)
import Data.ByteString.Lazy.Internal (packChars)
import qualified Data.ByteString.Lazy as BL
import Codec.Compression.Zlib (compress, decompress)
data Config = Config {
root :: String
, chunkSize :: Int
, compression :: Bool
} deriving Show
conf = Config {
root="/tmp/robot"
, chunkSize=2
, compression = True
}
chunkUrl :: Int -> ByteString -> [[Char]]
chunkUrl a url = chunksOf a $ show $ md5 url
-- replace it with createDirectoryIfMissing
existOrCreate :: [[Char]] -> FilePath -> IO [[Char]]
existOrCreate path_ dir = do
let path = joinPath path_
let returnPath = return $ path_ ++ [dir]
is <- elem dir <$> getDirectoryContents path -- ?
case is of
True -> do
returnPath
False -> do
createDirectory $ path ++ "/" ++ dir
returnPath
doPath :: [[Char]] -> [FilePath] -> IO [[Char]]
doPath root path = foldM (\x y -> existOrCreate x y) root path
splitAt' :: Int -> Int -> [Char] -> ([Char], [Char], [Char])
splitAt' i1 i2 x = (a, b, c) where
(a, a') = splitAt i1 x
(b, c) = splitAt i2 a'
-- ne pas écraser le fichier s'il existe
-- spliter l'url proprement
saveFile :: ByteString -> String -> IO String
saveFile url'' file = do
let url' = chunkUrl (chunkSize conf) url''
let url = init url'
-- add extension according to the filetype
let filename = Prelude.foldl addExtension (last url') ["html", "zlib"]
doPath [(root conf)] url
let path = (root conf) ++ "/" ++ joinPath url ++ "/" ++ filename
--case (findFile ["/tmp/sdfs"] "file.hmtl.zib"
-- Nothing -> create
-- _ -> change name
case (compression conf) of
True -> BL.writeFile path (compress $ packChars file)
False -> writeFile path file
return path
getFile :: FilePath -> IO ByteString
getFile path = do
case (compression conf) of
True -> decompress <$> BL.readFile path
False -> packChars <$> Prelude.readFile path
-- resources
-- add Resource
-- levensthein distance...
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment