Commit 45c3bb43 authored by Alexandre Delanoë's avatar Alexandre Delanoë

[TYPES] Terms and Human Lang types fusion.

parent 15511563
...@@ -55,15 +55,18 @@ workflow lang path = do ...@@ -55,15 +55,18 @@ workflow lang path = do
-- Text <- IO Text <- FilePath -- Text <- IO Text <- FilePath
text <- readFile path text <- readFile path
-- context :: Text -> [Text]
let contexts = splitBy (Sentences 5) text let contexts = splitBy (Sentences 5) text
-- Context :: Text -> [Text]
-- Contexts = Paragraphs n | Sentences n | Chars n
myterms <- extractTerms Mono lang contexts myterms <- extractTerms (Mono lang) contexts
-- myterms <- extractTerms (Mono lang) contexts # filter (\t -> not . elem t stopList) -- myterms # filter (\t -> not . elem t stopList)
-- # groupBy (Stem|GroupList) -- # groupBy (Stem|GroupList)
printDebug "myterms" (sum $ map length myterms) printDebug "myterms" (sum $ map length myterms)
-- Bulding the map list -- Bulding the map list
-- compute copresences of terms
-- Cooc = Map (Term, Term) Int
let myCooc1 = cooc myterms let myCooc1 = cooc myterms
printDebug "myCooc1" (M.size myCooc1) printDebug "myCooc1" (M.size myCooc1)
......
...@@ -180,7 +180,7 @@ metrics_sentences_Test = metrics_sentences == metrics_sentences' ...@@ -180,7 +180,7 @@ metrics_sentences_Test = metrics_sentences == metrics_sentences'
-} -}
metrics_terms :: IO [[Terms]] metrics_terms :: IO [[Terms]]
metrics_terms = mapM (terms MonoMulti EN) $ splitBy (Sentences 0) metrics_text metrics_terms = mapM (terms (MonoMulti EN)) $ splitBy (Sentences 0) metrics_text
-- | Occurrences -- | Occurrences
{- {-
......
...@@ -42,23 +42,23 @@ import Gargantext.Core.Types ...@@ -42,23 +42,23 @@ import Gargantext.Core.Types
import Gargantext.Text.Terms.Multi (multiterms) import Gargantext.Text.Terms.Multi (multiterms)
import Gargantext.Text.Terms.Mono (monoterms') import Gargantext.Text.Terms.Mono (monoterms')
data TermType = Mono | Multi | MonoMulti data TermType lang = Mono lang | Multi lang | MonoMulti lang
-- remove Stop Words -- remove Stop Words
-- map (filter (\t -> not . elem t)) $ -- map (filter (\t -> not . elem t)) $
------------------------------------------------------------------------ ------------------------------------------------------------------------
-- | Sugar to extract terms from text (hiddeng mapM from end user). -- | Sugar to extract terms from text (hiddeng mapM from end user).
extractTerms :: Traversable t => TermType -> Lang -> t Text -> IO (t [Terms]) extractTerms :: Traversable t => TermType Lang -> t Text -> IO (t [Terms])
extractTerms termType lang = mapM (terms termType lang) extractTerms termTypeLang = mapM (terms termTypeLang)
------------------------------------------------------------------------ ------------------------------------------------------------------------
-- | Terms from Text -- | Terms from Text
-- Mono : mono terms -- Mono : mono terms
-- Multi : multi terms -- Multi : multi terms
-- MonoMulti : mono and multi -- MonoMulti : mono and multi
-- TODO : multi terms should exclude mono (intersection is not empty yet) -- TODO : multi terms should exclude mono (intersection is not empty yet)
terms :: TermType -> Lang -> Text -> IO [Terms] terms :: TermType Lang -> Text -> IO [Terms]
terms Mono lang txt = pure $ monoterms' lang txt terms (Mono lang) txt = pure $ monoterms' lang txt
terms Multi lang txt = multiterms lang txt terms (Multi lang) txt = multiterms lang txt
terms MonoMulti lang txt = terms Multi lang txt terms (MonoMulti lang) txt = terms (Multi lang) txt
------------------------------------------------------------------------ ------------------------------------------------------------------------
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment