1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
{-|
Module : Gargantext.Core.Text.Ngrams
Description : Main Ngrams types
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
{-# OPTIONS_GHC -fno-warn-orphans #-}
{-# LANGUAGE Arrows #-}
{-# LANGUAGE FunctionalDependencies #-}
{-# LANGUAGE QuasiQuotes #-}
{-# LANGUAGE TemplateHaskell #-}
module Gargantext.Core.Text.Ngrams
where
import Codec.Serialise (Serialise())
import Control.Lens (over)
import Data.Aeson ( FromJSONKey(..), FromJSONKeyFunction(..), Value(String), ToJSONKey(..) )
import Data.Aeson.Types (toJSONKeyText)
import Data.Text (pack)
import Database.PostgreSQL.Simple qualified as PGS
import Gargantext.Core.Types (TODO(..))
import Gargantext.Database.Schema.Prelude hiding (over)
import Gargantext.Prelude
import Servant (FromHttpApiData(..), ToHttpApiData(..))
import Test.QuickCheck (elements)
import Text.Read (read)
-- | Main Ngrams Types
-- | Typed Ngrams
-- Typed Ngrams localize the context of the ngrams
-- ngrams in source field of document has Sources Type
-- ngrams in authors field of document has Authors Type
-- ngrams in text fields of documents has Terms Type (i.e. either title or abstract)
data NgramsType = Authors | Institutes | Sources | NgramsTerms
deriving (Eq, Show, Read, Ord, Enum, Bounded, Generic)
instance Serialise NgramsType
instance FromJSON NgramsType
where
parseJSON (String "Authors") = pure Authors
parseJSON (String "Institutes") = pure Institutes
parseJSON (String "Sources") = pure Sources
parseJSON (String "Terms") = pure NgramsTerms
parseJSON (String "NgramsTerms") = pure NgramsTerms
parseJSON _ = mzero
instance FromJSONKey NgramsType where
fromJSONKey = FromJSONKeyTextParser (parseJSON . String)
instance ToJSON NgramsType
where
toJSON Authors = String "Authors"
toJSON Institutes = String "Institutes"
toJSON Sources = String "Sources"
toJSON NgramsTerms = String "Terms"
instance ToJSONKey NgramsType where
toJSONKey = toJSONKeyText (pack . show)
instance FromHttpApiData NgramsType where
parseUrlPiece n = pure $ (read . cs) n
instance ToHttpApiData NgramsType where
toUrlPiece = pack . show
instance ToParamSchema NgramsType where
toParamSchema _ = toParamSchema (Proxy :: Proxy TODO)
instance Arbitrary NgramsType where
arbitrary = elements [ minBound .. maxBound ]
ngramsTypes :: [NgramsType]
ngramsTypes = [minBound..]
instance ToSchema NgramsType
{- where
declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_nre_")
--}
data Ngrams = UnsafeNgrams { _ngramsTerms :: Text
, _ngramsSize :: Int
}
deriving (Generic, Show, Eq, Ord)
instance Hashable Ngrams
makeLenses ''Ngrams
instance PGS.ToRow Ngrams where
toRow (UnsafeNgrams { .. }) = [ toField _ngramsTerms
, toField _ngramsSize ]
------------------------------------------------------------------------
-------------------------------------------------------------------------
-- Named entity are typed ngrams of Terms Ngrams
data NgramsT a =
NgramsT { _ngramsType :: NgramsType
, _ngramsT :: a
} deriving (Generic, Show, Eq, Ord)
makeLenses ''NgramsT
instance Functor NgramsT where
fmap = over ngramsT