1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
{-|
Module : Gargantext.
Description :
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
Here is a longer description of this module, containing some
commentary with @some markup@.
-}
module Gargantext.Text.Hetero where
import GHC.Real as R
import Data.Set as S
import Data.Map as M
import Data.List.Split as S
import Database.PostgreSQL.Simple as PGS
import Opaleye.PGTypes (PGInt4)
import Opaleye.Internal.Column (Column)
import Gargantext.Database.Gargandb
import Gargantext.Database.Private
--import Gargantext.Utils.Chronos
import Gargantext.Text.Words (cleanText)
import Gargantext.Text.Count (occurrences)
import Gargantext.Database.Simple
--main = do
-- t <- getTextquery
-- print (Prelude.map (heterogeinity . concat) $ S.chunksOf 3 t)
-- heterogeinity sur concat texts
heterogeinity' :: Int -> Int -> Int -> IO [Integer]
heterogeinity' corpus_id limit x = do
t <- getAbstract corpus_id limit
Prelude.mapM (dicoStruct . occurrences) $ (S.chunksOf x) . cleanText $ concat t
heterogeinity'' :: Int -> Int -> Int -> IO [Integer]
heterogeinity'' corpus_id limit size = do
t <- getAbstract corpus_id limit
Prelude.mapM (dicoStruct . occurrences) $ (S.chunksOf size) . cleanText $ concat t
dicoStruct :: (Integral r, Monad m) => M.Map t r -> m r
dicoStruct dict_occ = do
let keys_size = toInteger $ length $ M.keys dict_occ
let total_occ = sum $ Prelude.map (\(x, y) -> y) $ M.toList dict_occ
return $ div total_occ (fromIntegral keys_size)
-- heterogeinity sur UCT (Unité de Context Textuel)
heterogeinity :: [Char] -> IO Integer
heterogeinity string = do
let dict_occ = occurrences $ cleanText string
let keys_size = toInteger $ length $ M.keys dict_occ
let total_occ = sum $ Prelude.map (\(x, y) -> y) $ M.toList dict_occ
return $ div total_occ (fromIntegral keys_size)
--computeHeterogeinity
-- :: Fractional t =>
-- Opaleye.Internal.Column.Column Opaleye.PGTypes.PGInt4
-- -> IO (t, Integer, Integer)
computeHeterogeinity corpus_id = do
c <- PGS.connect infoGargandb
t <- getText c (nodeHyperdataText corpus_id)
heterogeinity $ Prelude.concat t
main2 = do
let corpus_ids = [
("ALL", 272927) -- 73
,("Histoire", 1387736) -- 28
,("Sciences Po", 1296892) -- 37
,("Phylosophie", 1170004) -- 20
,("Psychologie", 1345852) -- 37
,("Sociologie", 1246452) -- 42
]
r <- Prelude.map computeHeterogeinity $ Prelude.map (\(t,id) -> id) corpus_ids
return r