Commit 3f8c2b19 authored by Alexandre Delanoë's avatar Alexandre Delanoë

[CODE/READ] with NP.

parent a1f70708
...@@ -115,6 +115,7 @@ library: ...@@ -115,6 +115,7 @@ library:
- servant-static-th - servant-static-th
- split - split
- stemmer - stemmer
- string-conversions
- swagger2 - swagger2
- tagsoup - tagsoup
- text-metrics - text-metrics
......
...@@ -22,7 +22,7 @@ module Gargantext.Ngrams ( module Gargantext.Ngrams.Letters ...@@ -22,7 +22,7 @@ module Gargantext.Ngrams ( module Gargantext.Ngrams.Letters
, module Gargantext.Ngrams.TextMining , module Gargantext.Ngrams.TextMining
, module Gargantext.Ngrams.Metrics , module Gargantext.Ngrams.Metrics
, Ngrams(..), ngrams, occ, sumOcc, text2fis , Ngrams(..), ngrams, occ, sumOcc, text2fis
, NgramsList(..) , ListName(..), equivNgrams, isGram
--, module Gargantext.Ngrams.Words --, module Gargantext.Ngrams.Words
) where ) where
...@@ -40,7 +40,7 @@ import qualified Gargantext.Ngrams.FrequentItemSet as FIS ...@@ -40,7 +40,7 @@ import qualified Gargantext.Ngrams.FrequentItemSet as FIS
----------------------------------------------------------------- -----------------------------------------------------------------
import Data.List (sort) import Data.List (sort)
import Data.Char (Char, isAlpha, isSpace) import Data.Char (Char, isAlphaNum, isSpace)
import Data.Text (Text, words, filter, toLower) import Data.Text (Text, words, filter, toLower)
import Data.Map.Strict (Map import Data.Map.Strict (Map
, empty , empty
...@@ -58,32 +58,31 @@ import Gargantext.Prelude hiding (filter) ...@@ -58,32 +58,31 @@ import Gargantext.Prelude hiding (filter)
--import Language.Aspell.Options (ACOption(..)) --import Language.Aspell.Options (ACOption(..))
data NgramsList = Stop | Candidate | Graph data ListName = Stop | Candidate | Graph
deriving (Show, Eq) deriving (Show, Eq)
data Ngrams = Ngrams { _ngramsNgrams :: [Text] data Ngrams = Ngrams { _ngramsNgrams :: [Text]
, _ngramsStem :: [Text] , _ngramsStem :: [Text]
, _ngramsList :: Maybe NgramsList , _ngramsListName :: Maybe ListName
} deriving (Show) } deriving (Show)
instance Eq Ngrams where equivNgrams :: Ngrams -> Ngrams -> Bool
Ngrams n1 s1 _ == Ngrams n2 s2 _ = (sort n1) == (sort n2) || (sort s1) == (sort s2) equivNgrams (Ngrams n1 s1 _) (Ngrams n2 s2 _)
= (sort n1) == (sort n2) || (sort s1) == (sort s2)
type Occ = Int type Occ = Int
--type Index = Int --type Index = Int
-- Data Ngrams = Monograms | MultiGrams
ngrams :: Text -> [Text] ngrams :: Text -> [Text]
ngrams xs = monograms $ toLower $ filter isChar xs ngrams xs = monograms $ toLower $ filter isGram xs
monograms :: Text -> [Text] monograms :: Text -> [Text]
monograms = words monograms = words
-- TODO isGram :: Char -> Bool
-- 12-b isGram c = isAlphaNum c || isSpace c || c `elem` ['-','/']
isChar :: Char -> Bool
isChar '-' = True
isChar '/' = True
isChar c = isAlpha c || isSpace c
-- | Compute the occurrences (occ) -- | Compute the occurrences (occ)
occ :: Ord a => [a] -> Map a Occ occ :: Ord a => [a] -> Map a Occ
...@@ -91,7 +90,7 @@ occ xs = foldl' (\x y -> insertWith (+) y 1 x) empty xs ...@@ -91,7 +90,7 @@ occ xs = foldl' (\x y -> insertWith (+) y 1 x) empty xs
-- TODO add groups and filter stops -- TODO add groups and filter stops
sumOcc :: Ord a => [Map a Occ] -> Map a Occ sumOcc :: Ord a => [Map a Occ] -> Map a Occ
sumOcc xs = foldl' (\x y -> unionWith (+) x y) empty xs sumOcc xs = foldl' (unionWith (+)) empty xs
--noApax :: Ord a => Map a Occ -> Map a Occ --noApax :: Ord a => Map a Occ -> Map a Occ
--noApax m = M.filter (>1) m --noApax m = M.filter (>1) m
......
...@@ -28,8 +28,6 @@ import Gargantext.Prelude ...@@ -28,8 +28,6 @@ import Gargantext.Prelude
import Gargantext.Utils.Prefix (unPrefix) import Gargantext.Utils.Prefix (unPrefix)
import Data.Text (Text) import Data.Text (Text)
import qualified Data.ByteString.Char8 as S8
import qualified Data.Yaml as Yaml
import Network.HTTP.Simple import Network.HTTP.Simple
...@@ -82,17 +80,6 @@ $(deriveJSON (unPrefix "_") ''Sentences) ...@@ -82,17 +80,6 @@ $(deriveJSON (unPrefix "_") ''Sentences)
-- --
corenlpPretty :: Text -> IO ()
corenlpPretty txt = do
url <- parseRequest "POST http://localhost:9000/?properties={\"annotators\": \"tokenize,ssplit,pos,ner\", \"outputFormat\": \"json\"}"
let request = setRequestBodyJSON txt url
response <- httpJSON request
-- putStrLn $ "The status code was: " ++
-- show (getResponseStatusCode response)
-- print $ getResponseHeader "Content-Type" response
S8.putStrLn $ Yaml.encode (getResponseBody response :: Sentences)
corenlp :: Language -> Text -> IO Sentences corenlp :: Language -> Text -> IO Sentences
corenlp lang txt = do corenlp lang txt = do
let properties = case lang of let properties = case lang of
...@@ -100,7 +87,7 @@ corenlp lang txt = do ...@@ -100,7 +87,7 @@ corenlp lang txt = do
-- FR -> "{\"annotators\": \"tokenize,ssplit,pos,ner\", \"outputFormat\": \"json\"}" -- FR -> "{\"annotators\": \"tokenize,ssplit,pos,ner\", \"outputFormat\": \"json\"}"
FR -> "{\"annotators\": \"tokenize,ssplit,pos,ner\", \"parse.model\":\"edu/stanford/nlp/models/lexparser/frenchFactored.ser.gz\", \"pos.model\":\"edu/stanford/nlp/models/pos-tagger/french/french.tagger\", \"tokenize.language\":\"fr\", \"outputFormat\": \"json\"}" FR -> "{\"annotators\": \"tokenize,ssplit,pos,ner\", \"parse.model\":\"edu/stanford/nlp/models/lexparser/frenchFactored.ser.gz\", \"pos.model\":\"edu/stanford/nlp/models/pos-tagger/french/french.tagger\", \"tokenize.language\":\"fr\", \"outputFormat\": \"json\"}"
url <- parseRequest $ "POST http://localhost:9000/?properties=" <> properties url <- parseRequest $ "POST http://localhost:9000/?properties=" <> properties
let request = setRequestBodyJSON txt url let request = setRequestBodyLBS (cs txt) url
response <- httpJSON request response <- httpJSON request
pure (getResponseBody response :: Sentences) pure (getResponseBody response :: Sentences)
......
...@@ -28,6 +28,7 @@ import Gargantext.Prelude ...@@ -28,6 +28,7 @@ import Gargantext.Prelude
type Size = Either Int (Int, Int) type Size = Either Int (Int, Int)
--data Size = Point | Segment
------------------------------------------------------------------------ ------------------------------------------------------------------------
-- | Occurrence is Frequent Item Set of size 1 -- | Occurrence is Frequent Item Set of size 1
......
module Gargantext.Ngrams.List where
import Data.Maybe
import Data.List (filter)
import Gargantext.Ngrams
import Gargantext.Prelude
graph :: [Ngrams] -> [Ngrams]
graph ngs = filter (\ng -> _ngramsListName ng == Just Graph) ngs
candidates :: [Ngrams] -> [Ngrams]
candidates ngs = filter (\ng -> _ngramsListName ng == Just Candidate) ngs
stop :: [Ngrams] -> [Ngrams]
stop ngs = filter (\ng -> _ngramsListName ng == Just Stop) ngs
...@@ -52,6 +52,14 @@ import Gargantext.Parsers.WOS (wosParser) ...@@ -52,6 +52,14 @@ import Gargantext.Parsers.WOS (wosParser)
--import Gargantext.Prelude (pm) --import Gargantext.Prelude (pm)
--import Gargantext.Types.Main (ErrorMessage(), Corpus) --import Gargantext.Types.Main (ErrorMessage(), Corpus)
-- FIXME
--type Field = Text
type ParseError = String
--
--data Corpus = Corpus { _corpusErrors :: [ParseError]
-- , _corpusMap :: Map FilePath (Map Field Text)
-- }
-- | According to the format of Input file, -- | According to the format of Input file,
-- different parser are available. -- different parser are available.
...@@ -63,7 +71,6 @@ data FileFormat = WOS -- Implemented (ISI Format) ...@@ -63,7 +71,6 @@ data FileFormat = WOS -- Implemented (ISI Format)
-- -- > http://chrisdone.com/posts/fast-haskell-c-parsing-xml -- -- > http://chrisdone.com/posts/fast-haskell-c-parsing-xml
-- TODO: to debug maybe add the filepath in error message -- TODO: to debug maybe add the filepath in error message
type ParseError = String
parse :: FileFormat -> FilePath -> IO ([ParseError], [[(Text, Text)]]) parse :: FileFormat -> FilePath -> IO ([ParseError], [[(Text, Text)]])
......
...@@ -12,6 +12,7 @@ module Gargantext.Prelude ...@@ -12,6 +12,7 @@ module Gargantext.Prelude
, headMay , headMay
, module Text.Show , module Text.Show
, module Text.Read , module Text.Read
, cs
) )
where where
...@@ -30,6 +31,7 @@ import Protolude ( Bool(True, False), Int, Double, Integer ...@@ -30,6 +31,7 @@ import Protolude ( Bool(True, False), Int, Double, Integer
, Eq, (==), (>=), (<=), (<>) , Eq, (==), (>=), (<=), (<>)
, (&&), (||), not , (&&), (||), not
, fst, snd, toS , fst, snd, toS
, elem
) )
-- TODO import functions optimized in Utils.Count -- TODO import functions optimized in Utils.Count
...@@ -43,6 +45,8 @@ import qualified Data.Vector as V ...@@ -43,6 +45,8 @@ import qualified Data.Vector as V
import Safe (headMay) import Safe (headMay)
import Text.Show (Show(), show) import Text.Show (Show(), show)
import Text.Read (Read()) import Text.Read (Read())
import Data.String.Conversions (cs)
--pf :: (a -> Bool) -> [a] -> [a] --pf :: (a -> Bool) -> [a] -> [a]
--pf = filter --pf = filter
......
...@@ -21,8 +21,8 @@ unPrefix prefix = defaultOptions ...@@ -21,8 +21,8 @@ unPrefix prefix = defaultOptions
-- | Lower case leading character -- | Lower case leading character
unCapitalize :: String -> String unCapitalize :: String -> String
unCapitalize [] = [] unCapitalize [] = []
--unCapitalize (c:cs) = toLower c : cs unCapitalize (c:cs) = toLower c : cs
unCapitalize cs = map toLower cs --unCapitalize cs = map toLower cs
-- | Remove given prefix -- | Remove given prefix
dropPrefix :: String -> String -> String dropPrefix :: String -> String -> String
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment