Flow.hs 2.94 KB
Newer Older
1
{-|
2
Module      : Gargantext.Core.Text.Flow
3 4 5 6 7 8
Description : Server API
Copyright   : (c) CNRS, 2017-Present
License     : AGPL + CECILL v3
Maintainer  : team@gargantext.org
Stability   : experimental
Portability : POSIX
9 10 11

From text to viz, all the flow of texts in Gargantext.

12 13 14
-}


15
module Gargantext.Core.Text.Flow
16 17
  where

18
import qualified Data.Text as T
19
--import Data.Text.IO (readFile)
20
import Database.PostgreSQL.Simple (Connection)
21
import GHC.IO (FilePath)
22
--import Gargantext.Core (Lang)
23
import Gargantext.Core.Types (CorpusId)
24

25 26 27 28 29 30 31 32
{-
  ____                             _            _
 / ___| __ _ _ __ __ _  __ _ _ __ | |_ _____  _| |_
| |  _ / _` | '__/ _` |/ _` | '_ \| __/ _ \ \/ / __|
| |_| | (_| | | | (_| | (_| | | | | ||  __/>  <| |_
 \____|\__,_|_|  \__, |\__,_|_| |_|\__\___/_/\_\\__|
                 |___/
-}
33

Alexandre Delanoë's avatar
Alexandre Delanoë committed
34

35
contextText :: [T.Text]
36
contextText = ["The dog is an animal."
37 38
              ,"The bird is an animal."
              ,"The dog is an animal."
39
              ,"The animal is a bird or a dog ?"
40 41
              ,"The table is an object."
              ,"The pen is an object."
42
              ,"The object is a pen or a table ?"
43 44 45
              ,"The girl is a human."
              ,"The boy  is a human."
              ,"The boy or the girl are human."
46 47 48
              ]


49
-- | Control the flow of text
50 51 52
data TextFlow = CSV FilePath
              | FullText FilePath
              | Contexts [T.Text]
53
              | DBV3 Connection CorpusId
54
              | Query T.Text
55

56
{-
57 58
textFlow :: TermType Lang -> TextFlow -> IO Graph
textFlow termType workType = do
59
  contexts <- case workType of
60 61 62
                FullText path -> splitBy (Sentences 5) <$> readFile path
                CSV      path -> readCsvOn [csv_title, csv_abstract] path
                Contexts ctxt -> pure ctxt
63
                DBV3 con corpusId -> catMaybes <$> map (\n -> hyperdataDocumentV3_title (_node_hyperdata n)  <> hyperdataDocumentV3_abstract (_node_hyperdata n))<$> runReaderT (getDocumentsV3WithParentId corpusId) con
64
                _             -> undefined -- TODO Query not supported
65

66 67 68 69 70
  textFlow' termType contexts


textFlow' :: TermType Lang -> [T.Text] -> IO Graph
textFlow' termType contexts = do
71 72
  -- Context :: Text -> [Text]
  -- Contexts = Paragraphs n | Sentences n | Chars n
73

74
  myterms <- extractTerms termType contexts
75
  -- TermsType = Mono | Multi | MonoMulti
76
  -- myterms # filter (\t -> not . elem t stopList)
77
  --         # groupBy (Stem|GroupList|Ontology)
78 79
  --printDebug "terms" myterms
  --printDebug "myterms" (sum $ map length myterms)
80 81

  -- Bulding the map list
82
  -- compute copresences of terms, i.e. cooccurrences of terms in same context of text
83
  -- Cooc = Map (Term, Term) Int
84
  let myCooc1 = coocOn (_terms_label) myterms
85
  --printDebug "myCooc1 size" (M.size myCooc1)
86 87

  -- Remove Apax: appears one time only => lighting the matrix
88
  let myCooc2 = Map.filter (>0) myCooc1
89 90
  --printDebug "myCooc2 size" (M.size myCooc2)
  --printDebug "myCooc2" myCooc2
91 92
  g <- cooc2graph myCooc2
  pure g
93
-}
94