Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Christian Merten
haskell-gargantext
Commits
00dc93a0
Commit
00dc93a0
authored
Jan 08, 2021
by
Alexandre Delanoë
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
[TextFlow REFACT] addin ExtractedNgrams type for simple or enriched ngrams extraction
parent
8f0fcd75
Changes
9
Hide whitespace changes
Inline
Side-by-side
Showing
9 changed files
with
119 additions
and
62 deletions
+119
-62
Core.hs
src/Gargantext/Core.hs
+8
-4
Terms.hs
src/Gargantext/Core/Text/Terms.hs
+43
-21
PosTagging.hs
src/Gargantext/Core/Text/Terms/Multi/PosTagging.hs
+4
-10
Types.hs
src/Gargantext/Core/Types.hs
+6
-4
Flow.hs
src/Gargantext/Database/Action/Flow.hs
+14
-12
Ngrams.hs
src/Gargantext/Database/Query/Table/Ngrams.hs
+4
-2
NgramsPostag.hs
src/Gargantext/Database/Query/Table/NgramsPostag.hs
+32
-3
Ngrams.hs
src/Gargantext/Database/Schema/Ngrams.hs
+7
-5
NgramsPostag.hs
src/Gargantext/Database/Schema/NgramsPostag.hs
+1
-1
No files found.
src/Gargantext/Core.hs
View file @
00dc93a0
...
@@ -12,11 +12,12 @@ Portability : POSIX
...
@@ -12,11 +12,12 @@ Portability : POSIX
module
Gargantext.Core
module
Gargantext.Core
where
where
import
Gargantext.Prelude
import
GHC.Generics
(
Generic
)
import
Data.Aeson
import
Data.Aeson
import
Data.Either
(
Either
(
Left
))
import
Data.Either
(
Either
(
Left
))
import
Data.Hashable
(
Hashable
)
import
Data.Swagger
import
Data.Swagger
import
GHC.Generics
(
Generic
)
import
Gargantext.Prelude
import
Servant.API
import
Servant.API
------------------------------------------------------------------------
------------------------------------------------------------------------
...
@@ -47,6 +48,8 @@ instance FromHttpApiData Lang
...
@@ -47,6 +48,8 @@ instance FromHttpApiData Lang
parseUrlPiece
"FR"
=
pure
FR
parseUrlPiece
"FR"
=
pure
FR
parseUrlPiece
"All"
=
pure
All
parseUrlPiece
"All"
=
pure
All
parseUrlPiece
_
=
Left
"Unexpected value of OrderBy"
parseUrlPiece
_
=
Left
"Unexpected value of OrderBy"
instance
Hashable
Lang
allLangs
::
[
Lang
]
allLangs
::
[
Lang
]
allLangs
=
[
minBound
..
]
allLangs
=
[
minBound
..
]
...
@@ -64,10 +67,11 @@ instance HasDBid Lang where
...
@@ -64,10 +67,11 @@ instance HasDBid Lang where
fromDBid
2
=
EN
fromDBid
2
=
EN
fromDBid
_
=
panic
"HasDBid lang, not implemented"
fromDBid
_
=
panic
"HasDBid lang, not implemented"
------------------------------------------------------------------------
------------------------------------------------------------------------
data
PostTagAlgo
=
CoreNLP
data
PostTagAlgo
=
CoreNLP
deriving
(
Show
,
Read
)
deriving
(
Show
,
Read
,
Eq
,
Ord
,
Generic
)
instance
Hashable
PostTagAlgo
instance
HasDBid
PostTagAlgo
where
instance
HasDBid
PostTagAlgo
where
toDBid
CoreNLP
=
1
toDBid
CoreNLP
=
1
...
...
src/Gargantext/Core/Text/Terms.hs
View file @
00dc93a0
...
@@ -36,6 +36,7 @@ module Gargantext.Core.Text.Terms
...
@@ -36,6 +36,7 @@ module Gargantext.Core.Text.Terms
import
Control.Lens
import
Control.Lens
import
Data.HashMap.Strict
(
HashMap
)
import
Data.HashMap.Strict
(
HashMap
)
import
Data.Hashable
(
Hashable
)
import
Data.Map
(
Map
)
import
Data.Map
(
Map
)
import
Data.Text
(
Text
)
import
Data.Text
(
Text
)
import
Data.Traversable
import
Data.Traversable
...
@@ -45,7 +46,6 @@ import qualified Data.List as List
...
@@ -45,7 +46,6 @@ import qualified Data.List as List
import
qualified
Data.Map
as
Map
import
qualified
Data.Map
as
Map
import
qualified
Data.Set
as
Set
import
qualified
Data.Set
as
Set
import
qualified
Data.Text
as
Text
import
qualified
Data.Text
as
Text
import
qualified
Gargantext.Data.HashMap.Strict.Utils
as
HashMap
import
Gargantext.Core
import
Gargantext.Core
import
Gargantext.Core.Flow.Types
import
Gargantext.Core.Flow.Types
...
@@ -57,9 +57,11 @@ import Gargantext.Core.Text.Terms.Mono.Token.En (tokenize)
...
@@ -57,9 +57,11 @@ import Gargantext.Core.Text.Terms.Mono.Token.En (tokenize)
import
Gargantext.Core.Text.Terms.Multi
(
multiterms
)
import
Gargantext.Core.Text.Terms.Multi
(
multiterms
)
import
Gargantext.Core.Types
import
Gargantext.Core.Types
import
Gargantext.Database.Prelude
(
Cmd
)
import
Gargantext.Database.Prelude
(
Cmd
)
import
Gargantext.Database.Schema.Ngrams
(
Ngrams
(
..
),
NgramsType
(
..
),
ngramsTerms
,
text2ngrams
)
import
Gargantext.Database.Query.Table.Ngrams
(
insertNgrams
)
import
Gargantext.Database.Query.Table.NgramsPostag
(
NgramsPostag
(
..
),
insertNgramsPostag
)
import
Gargantext.Database.Schema.Ngrams
(
Ngrams
(
..
),
NgramsType
(
..
),
ngramsTerms
,
text2ngrams
,
NgramsId
)
import
Gargantext.Prelude
import
Gargantext.Prelude
import
qualified
Gargantext.Data.HashMap.Strict.Utils
as
HashMap
data
TermType
lang
data
TermType
lang
=
Mono
{
_tt_lang
::
!
lang
}
=
Mono
{
_tt_lang
::
!
lang
}
...
@@ -111,26 +113,44 @@ withLang (Unsupervised l n s m) ns = Unsupervised l n s m'
...
@@ -111,26 +113,44 @@ withLang (Unsupervised l n s m) ns = Unsupervised l n s m'
withLang
l
_
=
l
withLang
l
_
=
l
------------------------------------------------------------------------
------------------------------------------------------------------------
data
ExtractedNgrams
=
SimpleNgrams
{
unSimpleNgrams
::
Ngrams
}
|
EnrichedNgrams
{
unEnrichedNgrams
::
NgramsPostag
}
deriving
(
Eq
,
Ord
,
Generic
)
instance
Hashable
ExtractedNgrams
class
ExtractNgramsT
h
class
ExtractNgramsT
h
where
where
extractNgramsT
::
HasText
h
extractNgramsT
::
HasText
h
=>
TermType
Lang
=>
TermType
Lang
->
h
->
h
->
Cmd
err
(
HashMap
Ngrams
(
Map
NgramsType
Int
))
->
Cmd
err
(
HashMap
ExtractedNgrams
(
Map
NgramsType
Int
))
------------------------------------------------------------------------
cleanExtractedNgrams
::
Int
->
ExtractedNgrams
->
ExtractedNgrams
cleanExtractedNgrams
s
(
SimpleNgrams
ng
)
|
Text
.
length
(
ng
^.
ngramsTerms
)
<
s
=
SimpleNgrams
ng
|
otherwise
=
SimpleNgrams
$
text2ngrams
(
Text
.
take
s
(
ng
^.
ngramsTerms
))
cleanExtractedNgrams
s
_
=
undefined
extracted2ngrams
::
ExtractedNgrams
->
Ngrams
extracted2ngrams
(
SimpleNgrams
ng
)
=
ng
extracted2ngrams
_
=
undefined
filterNgrams
::
Int
->
HashMap
Ngrams
(
Map
NgramsType
Int
)
isSimpleNgrams
::
ExtractedNgrams
->
Bool
->
HashMap
Ngrams
(
Map
NgramsType
Int
)
isSimpleNgrams
(
SimpleNgrams
_
)
=
True
filterNgrams
s
=
HashMap
.
mapKeys
filter
isSimpleNgrams
_
=
False
where
filter
ng
|
Text
.
length
(
ng
^.
ngramsTerms
)
<
s
=
ng
|
otherwise
=
text2ngrams
(
Text
.
take
s
(
ng
^.
ngramsTerms
))
insertExtractedNgrams
::
[
ExtractedNgrams
]
->
Cmd
err
(
HashMap
Text
NgramsId
)
insertExtractedNgrams
ngs
=
do
let
(
s
,
e
)
=
List
.
partition
isSimpleNgrams
ngs
m1
<-
insertNgrams
(
map
unSimpleNgrams
s
)
m2
<-
insertNgramsPostag
(
map
unEnrichedNgrams
e
)
pure
$
m1
<>
m2
-- =======================================================
------------------------------------------------------------------------
-- | Terms from Text
-- | Terms from Text
-- Mono : mono terms
-- Mono : mono terms
-- Multi : multi terms
-- Multi : multi terms
...
@@ -147,15 +167,6 @@ terms (Unsupervised lang n s m) txt = termsUnsupervised (Unsupervised lang n s (
...
@@ -147,15 +167,6 @@ terms (Unsupervised lang n s m) txt = termsUnsupervised (Unsupervised lang n s (
------------------------------------------------------------------------
------------------------------------------------------------------------
text2term
::
Lang
->
[
Text
]
->
Terms
text2term
_
[]
=
Terms
[]
Set
.
empty
text2term
lang
txt
=
Terms
txt
(
Set
.
fromList
$
map
(
stem
lang
)
txt
)
isPunctuation
::
Text
->
Bool
isPunctuation
x
=
List
.
elem
x
$
(
Text
.
pack
.
pure
)
<$>
(
"!?(),;."
::
String
)
-- | Unsupervised ngrams extraction
-- | Unsupervised ngrams extraction
-- language agnostic extraction
-- language agnostic extraction
-- TODO: remove IO
-- TODO: remove IO
...
@@ -175,6 +186,8 @@ termsUnsupervised (Unsupervised l n s m) =
...
@@ -175,6 +186,8 @@ termsUnsupervised (Unsupervised l n s m) =
.
uniText
.
uniText
termsUnsupervised
_
=
undefined
termsUnsupervised
_
=
undefined
newTries
::
Int
->
Text
->
Tries
Token
()
newTries
::
Int
->
Text
->
Tries
Token
()
newTries
n
t
=
buildTries
n
(
fmap
toToken
$
uniText
t
)
newTries
n
t
=
buildTries
n
(
fmap
toToken
$
uniText
t
)
...
@@ -185,3 +198,12 @@ uniText = map (List.filter (not . isPunctuation))
...
@@ -185,3 +198,12 @@ uniText = map (List.filter (not . isPunctuation))
.
sentences
-- TODO get sentences according to lang
.
sentences
-- TODO get sentences according to lang
.
Text
.
toLower
.
Text
.
toLower
text2term
::
Lang
->
[
Text
]
->
Terms
text2term
_
[]
=
Terms
[]
Set
.
empty
text2term
lang
txt
=
Terms
txt
(
Set
.
fromList
$
map
(
stem
lang
)
txt
)
isPunctuation
::
Text
->
Bool
isPunctuation
x
=
List
.
elem
x
$
(
Text
.
pack
.
pure
)
<$>
(
"!?(),;."
::
String
)
src/Gargantext/Core/Text/Terms/Multi/PosTagging.hs
View file @
00dc93a0
...
@@ -25,25 +25,19 @@ Source: https://en.wikipedia.org/wiki/Part-of-speech_tagging
...
@@ -25,25 +25,19 @@ Source: https://en.wikipedia.org/wiki/Part-of-speech_tagging
module
Gargantext.Core.Text.Terms.Multi.PosTagging
module
Gargantext.Core.Text.Terms.Multi.PosTagging
where
where
import
GHC.Generics
import
Data.ByteString.Lazy.Internal
(
ByteString
)
import
Data.Aeson.TH
(
deriveJSON
)
import
Data.Aeson
import
Data.Aeson
import
Data.Aeson.TH
(
deriveJSON
)
import
Data.ByteString.Lazy.Internal
(
ByteString
)
import
Data.Set
(
fromList
)
import
Data.Set
(
fromList
)
import
Data.String.Conversions
(
ConvertibleStrings
)
import
Data.Text
(
Text
,
splitOn
,
pack
,
toLower
)
import
Data.Text
(
Text
,
splitOn
,
pack
,
toLower
)
import
GHC.Generics
import
Gargantext.Core
(
Lang
(
..
))
import
Gargantext.Core
(
Lang
(
..
))
import
Gargantext.Core.Types
import
Gargantext.Core.Types
import
Gargantext.Core.Utils.Prefix
(
unPrefix
)
import
Gargantext.Core.Utils.Prefix
(
unPrefix
)
import
Gargantext.Prelude
import
Gargantext.Prelude
import
Network.HTTP.Simple
import
Network.HTTP.Simple
import
Data.String.Conversions
(
ConvertibleStrings
)
------------------------------------------------------------------------
------------------------------------------------------------------------
------------------------------------------------------------------------
------------------------------------------------------------------------
data
Token
=
Token
{
_tokenIndex
::
Int
data
Token
=
Token
{
_tokenIndex
::
Int
...
...
src/Gargantext/Core/Types.hs
View file @
00dc93a0
...
@@ -31,6 +31,7 @@ import Control.Lens (Prism', (#), makeLenses, over)
...
@@ -31,6 +31,7 @@ import Control.Lens (Prism', (#), makeLenses, over)
import
Control.Monad.Except
(
MonadError
(
throwError
))
import
Control.Monad.Except
(
MonadError
(
throwError
))
import
Data.Aeson
import
Data.Aeson
import
Data.Aeson.TH
(
deriveJSON
)
import
Data.Aeson.TH
(
deriveJSON
)
import
Data.Hashable
(
Hashable
)
import
Data.Maybe
import
Data.Maybe
import
Data.Monoid
import
Data.Monoid
import
Data.Semigroup
import
Data.Semigroup
...
@@ -40,11 +41,11 @@ import Data.Swagger (ToSchema(..))
...
@@ -40,11 +41,11 @@ import Data.Swagger (ToSchema(..))
import
Data.Text
(
Text
,
unpack
)
import
Data.Text
(
Text
,
unpack
)
import
Data.Validity
import
Data.Validity
import
GHC.Generics
import
GHC.Generics
import
Test.QuickCheck.Arbitrary
(
Arbitrary
,
arbitrary
)
import
Gargantext.Core.Types.Main
import
Gargantext.Core.Types.Main
import
Gargantext.Database.Admin.Types.Node
import
Gargantext.Core.Utils.Prefix
(
unPrefix
,
wellNamedSchema
)
import
Gargantext.Core.Utils.Prefix
(
unPrefix
,
wellNamedSchema
)
import
Gargantext.Database.Admin.Types.Node
import
Gargantext.Prelude
import
Gargantext.Prelude
import
Test.QuickCheck.Arbitrary
(
Arbitrary
,
arbitrary
)
------------------------------------------------------------------------
------------------------------------------------------------------------
data
Ordering
=
Down
|
Up
data
Ordering
=
Down
|
Up
...
@@ -74,7 +75,7 @@ data POS = NP
...
@@ -74,7 +75,7 @@ data POS = NP
|
JJ
|
VB
|
JJ
|
VB
|
CC
|
IN
|
DT
|
CC
|
IN
|
DT
|
NoPos
|
NoPos
deriving
(
Show
,
Generic
,
Eq
)
deriving
(
Show
,
Generic
,
Eq
,
Ord
)
------------------------------------------------------------------------
------------------------------------------------------------------------
instance
FromJSON
POS
where
instance
FromJSON
POS
where
parseJSON
=
withText
"String"
(
\
x
->
pure
(
pos
$
unpack
x
))
parseJSON
=
withText
"String"
(
\
x
->
pure
(
pos
$
unpack
x
))
...
@@ -94,10 +95,11 @@ instance FromJSON POS where
...
@@ -94,10 +95,11 @@ instance FromJSON POS where
pos
"IN"
=
IN
pos
"IN"
=
IN
pos
"DT"
=
DT
pos
"DT"
=
DT
-- French specific
-- French specific
pos
"P"
=
IN
pos
"P"
=
IN
pos
_
=
NoPos
pos
_
=
NoPos
instance
ToJSON
POS
instance
ToJSON
POS
instance
Hashable
POS
------------------------------------------------------------------------
------------------------------------------------------------------------
data
NER
=
PERSON
|
ORGANIZATION
|
LOCATION
|
NoNER
data
NER
=
PERSON
|
ORGANIZATION
|
LOCATION
|
NoNER
deriving
(
Show
,
Generic
)
deriving
(
Show
,
Generic
)
...
...
src/Gargantext/Database/Action/Flow.hs
View file @
00dc93a0
...
@@ -263,13 +263,15 @@ insertMasterDocs c lang hs = do
...
@@ -263,13 +263,15 @@ insertMasterDocs c lang hs = do
-- this will enable global database monitoring
-- this will enable global database monitoring
-- maps :: IO Map Ngrams (Map NgramsType (Map NodeId Int))
-- maps :: IO Map Ngrams (Map NgramsType (Map NodeId Int))
mapNgramsDocs
::
HashMap
Ngrams
(
Map
NgramsType
(
Map
NodeId
Int
))
mapNgramsDocs
'
::
HashMap
Extracted
Ngrams
(
Map
NgramsType
(
Map
NodeId
Int
))
<-
mapNodeIdNgrams
<-
mapNodeIdNgrams
<$>
documentIdWithNgrams
<$>
documentIdWithNgrams
(
extractNgramsT
$
withLang
lang
documentsWithId
)
(
extractNgramsT
$
withLang
lang
documentsWithId
)
documentsWithId
documentsWithId
terms2id
<-
insertNgrams
$
HashMap
.
keys
mapNgramsDocs
terms2id
<-
insertExtractedNgrams
$
HashMap
.
keys
mapNgramsDocs'
let
mapNgramsDocs
=
HashMap
.
mapKeys
extracted2ngrams
mapNgramsDocs'
-- to be removed
-- to be removed
let
indexedNgrams
=
HashMap
.
mapKeys
(
indexNgrams
terms2id
)
mapNgramsDocs
let
indexedNgrams
=
HashMap
.
mapKeys
(
indexNgrams
terms2id
)
mapNgramsDocs
...
@@ -372,28 +374,28 @@ mapNodeIdNgrams = HashMap.unionsWith (Map.unionWith (Map.unionWith (+))) . fmap
...
@@ -372,28 +374,28 @@ mapNodeIdNgrams = HashMap.unionsWith (Map.unionWith (Map.unionWith (+))) . fmap
------------------------------------------------------------------------
------------------------------------------------------------------------
instance
ExtractNgramsT
HyperdataContact
instance
ExtractNgramsT
HyperdataContact
where
where
extractNgramsT
l
hc
=
filterNgrams
255
<$>
extract
l
hc
extractNgramsT
l
hc
=
HashMap
.
mapKeys
(
cleanExtractedNgrams
255
)
<$>
extract
l
hc
where
where
extract
::
TermType
Lang
->
HyperdataContact
extract
::
TermType
Lang
->
HyperdataContact
->
Cmd
err
(
HashMap
Ngrams
(
Map
NgramsType
Int
))
->
Cmd
err
(
HashMap
Extracted
Ngrams
(
Map
NgramsType
Int
))
extract
_l
hc'
=
do
extract
_l
hc'
=
do
let
authors
=
map
text2ngrams
let
authors
=
map
text2ngrams
$
maybe
[
"Nothing"
]
(
\
a
->
[
a
])
$
maybe
[
"Nothing"
]
(
\
a
->
[
a
])
$
view
(
hc_who
.
_Just
.
cw_lastName
)
hc'
$
view
(
hc_who
.
_Just
.
cw_lastName
)
hc'
pure
$
HashMap
.
fromList
$
[(
a'
,
Map
.
singleton
Authors
1
)
|
a'
<-
authors
]
pure
$
HashMap
.
fromList
$
[(
SimpleNgrams
a'
,
Map
.
singleton
Authors
1
)
|
a'
<-
authors
]
instance
ExtractNgramsT
HyperdataDocument
instance
ExtractNgramsT
HyperdataDocument
where
where
extractNgramsT
::
TermType
Lang
extractNgramsT
::
TermType
Lang
->
HyperdataDocument
->
HyperdataDocument
->
Cmd
err
(
HashMap
Ngrams
(
Map
NgramsType
Int
))
->
Cmd
err
(
HashMap
Extracted
Ngrams
(
Map
NgramsType
Int
))
extractNgramsT
lang
hd
=
filterNgrams
255
<$>
extractNgramsT'
lang
hd
extractNgramsT
lang
hd
=
HashMap
.
mapKeys
(
cleanExtractedNgrams
255
)
<$>
extractNgramsT'
lang
hd
where
where
extractNgramsT'
::
TermType
Lang
extractNgramsT'
::
TermType
Lang
->
HyperdataDocument
->
HyperdataDocument
->
Cmd
err
(
HashMap
Ngrams
(
Map
NgramsType
Int
))
->
Cmd
err
(
HashMap
Extracted
Ngrams
(
Map
NgramsType
Int
))
extractNgramsT'
lang'
doc
=
do
extractNgramsT'
lang'
doc
=
do
let
source
=
text2ngrams
let
source
=
text2ngrams
$
maybe
"Nothing"
identity
$
maybe
"Nothing"
identity
...
@@ -412,10 +414,10 @@ instance ExtractNgramsT HyperdataDocument
...
@@ -412,10 +414,10 @@ instance ExtractNgramsT HyperdataDocument
<$>
concat
<$>
concat
<$>
liftBase
(
extractTerms
lang'
$
hasText
doc
)
<$>
liftBase
(
extractTerms
lang'
$
hasText
doc
)
pure
$
HashMap
.
fromList
$
[(
source
,
Map
.
singleton
Sources
1
)]
pure
$
HashMap
.
fromList
$
[(
SimpleNgrams
source
,
Map
.
singleton
Sources
1
)]
<>
[(
i'
,
Map
.
singleton
Institutes
1
)
|
i'
<-
institutes
]
<>
[(
SimpleNgrams
i'
,
Map
.
singleton
Institutes
1
)
|
i'
<-
institutes
]
<>
[(
a'
,
Map
.
singleton
Authors
1
)
|
a'
<-
authors
]
<>
[(
SimpleNgrams
a'
,
Map
.
singleton
Authors
1
)
|
a'
<-
authors
]
<>
[(
t'
,
Map
.
singleton
NgramsTerms
1
)
|
t'
<-
terms'
]
<>
[(
SimpleNgrams
t'
,
Map
.
singleton
NgramsTerms
1
)
|
t'
<-
terms'
]
instance
(
ExtractNgramsT
a
,
HasText
a
)
=>
ExtractNgramsT
(
Node
a
)
instance
(
ExtractNgramsT
a
,
HasText
a
)
=>
ExtractNgramsT
(
Node
a
)
where
where
...
...
src/Gargantext/Database/Query/Table/Ngrams.hs
View file @
00dc93a0
...
@@ -22,10 +22,12 @@ module Gargantext.Database.Query.Table.Ngrams
...
@@ -22,10 +22,12 @@ module Gargantext.Database.Query.Table.Ngrams
where
where
import
Control.Lens
((
^.
))
import
Control.Lens
((
^.
))
import
Data.HashMap.Strict
(
HashMap
)
import
Data.ByteString.Internal
(
ByteString
)
import
Data.ByteString.Internal
(
ByteString
)
import
Data.Map
(
Map
,
fromList
)
import
Data.Map
(
Map
,
fromList
)
import
Data.Text
(
Text
)
import
Data.Text
(
Text
)
import
qualified
Database.PostgreSQL.Simple
as
PGS
import
qualified
Database.PostgreSQL.Simple
as
PGS
import
qualified
Data.HashMap.Strict
as
HashMap
import
Gargantext.Core.Types
import
Gargantext.Core.Types
import
Gargantext.Database.Prelude
(
runOpaQuery
,
Cmd
)
import
Gargantext.Database.Prelude
(
runOpaQuery
,
Cmd
)
...
@@ -64,8 +66,8 @@ _dbGetNgramsDb = runOpaQuery queryNgramsTable
...
@@ -64,8 +66,8 @@ _dbGetNgramsDb = runOpaQuery queryNgramsTable
-- TODO-ACCESS: access must not be checked here but when insertNgrams is called.
-- TODO-ACCESS: access must not be checked here but when insertNgrams is called.
insertNgrams
::
[
Ngrams
]
->
Cmd
err
(
Map
Text
NgramsId
)
insertNgrams
::
[
Ngrams
]
->
Cmd
err
(
Hash
Map
Text
NgramsId
)
insertNgrams
ns
=
fromList
<$>
map
(
\
(
Indexed
i
t
)
->
(
t
,
i
))
<$>
(
insertNgrams'
ns
)
insertNgrams
ns
=
HashMap
.
fromList
<$>
map
(
\
(
Indexed
i
t
)
->
(
t
,
i
))
<$>
(
insertNgrams'
ns
)
-- TODO-ACCESS: access must not be checked here but when insertNgrams' is called.
-- TODO-ACCESS: access must not be checked here but when insertNgrams' is called.
insertNgrams'
::
[
Ngrams
]
->
Cmd
err
[
Indexed
Int
Text
]
insertNgrams'
::
[
Ngrams
]
->
Cmd
err
[
Indexed
Int
Text
]
...
...
src/Gargantext/Database/Query/Table/NgramsPostag.hs
View file @
00dc93a0
...
@@ -16,16 +16,30 @@ Portability : POSIX
...
@@ -16,16 +16,30 @@ Portability : POSIX
module
Gargantext.Database.Query.Table.NgramsPostag
module
Gargantext.Database.Query.Table.NgramsPostag
where
where
import
Data.HashMap.Strict
(
HashMap
)
import
Data.Hashable
(
Hashable
)
import
Data.Text
(
Text
)
import
Data.Text
(
Text
)
import
GHC.Generics
(
Generic
)
import
Gargantext.Core
import
Gargantext.Core.Types
import
Gargantext.Database.Prelude
(
Cmd
,
runPGSQuery
)
import
Gargantext.Database.Prelude
(
Cmd
,
runPGSQuery
)
import
Gargantext.Database.Schema.Ngrams
import
Gargantext.Database.Schema.Ngrams
import
Gargantext.Database.Schema.Prelude
import
Gargantext.Database.Schema.Prelude
import
Gargantext.Database.Types
import
Gargantext.Database.Types
import
Gargantext.Prelude
import
Gargantext.Prelude
import
qualified
Data.HashMap.Strict
as
HashMap
import
qualified
Database.PostgreSQL.Simple
as
PGS
import
qualified
Database.PostgreSQL.Simple
as
PGS
data
NgramsPostag
=
NgramsPostag
{
_np_lang
::
Lang
,
_np_algo
::
PostTagAlgo
,
_np_postag
::
POS
,
_np_form
::
Ngrams
,
_np_lem
::
Ngrams
}
deriving
(
Eq
,
Ord
,
Generic
)
instance
Hashable
NgramsPostag
type
NgramsPostagInsert
=
(
Int
type
NgramsPostagInsert
=
(
Int
,
Int
,
Int
...
@@ -36,9 +50,24 @@ type NgramsPostagInsert = ( Int
...
@@ -36,9 +50,24 @@ type NgramsPostagInsert = ( Int
,
Int
,
Int
)
)
toInsert
::
NgramsPostag
->
NgramsPostagInsert
insertNgramsPostag
::
[
NgramsPostagInsert
]
->
Cmd
err
[
Indexed
Int
Text
]
toInsert
(
NgramsPostag
l
a
p
form
lem
)
=
insertNgramsPostag
ns
=
runPGSQuery
queryInsertNgramsPostag
(
PGS
.
Only
$
Values
fields
ns
)
(
toDBid
l
,
toDBid
a
,
cs
$
show
p
,
_ngramsTerms
form
,
_ngramsSize
form
,
_ngramsTerms
lem
,
_ngramsSize
lem
)
insertNgramsPostag
::
[
NgramsPostag
]
->
Cmd
err
(
HashMap
Text
NgramsId
)
insertNgramsPostag
ns
=
HashMap
.
fromList
<$>
map
(
\
(
Indexed
t
i
)
->
(
t
,
i
))
<$>
insertNgramsPostag'
(
map
toInsert
ns
)
insertNgramsPostag'
::
[
NgramsPostagInsert
]
->
Cmd
err
[
Indexed
Text
Int
]
insertNgramsPostag'
ns
=
runPGSQuery
queryInsertNgramsPostag
(
PGS
.
Only
$
Values
fields
ns
)
where
where
fields
=
map
(
\
t
->
QualifiedIdentifier
Nothing
t
)
$
snd
fields_name
fields
=
map
(
\
t
->
QualifiedIdentifier
Nothing
t
)
$
snd
fields_name
...
...
src/Gargantext/Database/Schema/Ngrams.hs
View file @
00dc93a0
...
@@ -19,6 +19,7 @@ Ngrams connection to the Database.
...
@@ -19,6 +19,7 @@ Ngrams connection to the Database.
module
Gargantext.Database.Schema.Ngrams
module
Gargantext.Database.Schema.Ngrams
where
where
import
Data.HashMap.Strict
(
HashMap
)
import
Data.Hashable
(
Hashable
)
import
Data.Hashable
(
Hashable
)
import
Codec.Serialise
(
Serialise
())
import
Codec.Serialise
(
Serialise
())
import
Control.Lens
(
over
)
import
Control.Lens
(
over
)
...
@@ -35,6 +36,7 @@ import Text.Read (read)
...
@@ -35,6 +36,7 @@ import Text.Read (read)
import
Gargantext.Database.Types
import
Gargantext.Database.Types
import
Gargantext.Database.Schema.Prelude
import
Gargantext.Database.Schema.Prelude
import
qualified
Database.PostgreSQL.Simple
as
PGS
import
qualified
Database.PostgreSQL.Simple
as
PGS
import
qualified
Data.HashMap.Strict
as
HashMap
type
NgramsId
=
Int
type
NgramsId
=
Int
...
@@ -178,19 +180,19 @@ instance Functor NgramsT where
...
@@ -178,19 +180,19 @@ instance Functor NgramsT where
fmap
=
over
ngramsT
fmap
=
over
ngramsT
-----------------------------------------------------------------------
-----------------------------------------------------------------------
withMap
::
Map
Text
NgramsId
->
Text
->
NgramsId
withMap
::
Hash
Map
Text
NgramsId
->
Text
->
NgramsId
withMap
m
n
=
maybe
(
panic
"withMap: should not happen"
)
identity
(
lookup
n
m
)
withMap
m
n
=
maybe
(
panic
"withMap: should not happen"
)
identity
(
HashMap
.
lookup
n
m
)
indexNgramsT
::
Map
Text
NgramsId
->
NgramsT
Ngrams
->
NgramsT
(
Indexed
Int
Ngrams
)
indexNgramsT
::
Hash
Map
Text
NgramsId
->
NgramsT
Ngrams
->
NgramsT
(
Indexed
Int
Ngrams
)
indexNgramsT
=
fmap
.
indexNgramsWith
.
withMap
indexNgramsT
=
fmap
.
indexNgramsWith
.
withMap
-- | TODO replace NgramsT whith Typed NgramsType Ngrams
-- | TODO replace NgramsT whith Typed NgramsType Ngrams
indexTypedNgrams
::
Map
Text
NgramsId
indexTypedNgrams
::
Hash
Map
Text
NgramsId
->
Typed
NgramsType
Ngrams
->
Typed
NgramsType
Ngrams
->
Typed
NgramsType
(
Indexed
Int
Ngrams
)
->
Typed
NgramsType
(
Indexed
Int
Ngrams
)
indexTypedNgrams
=
fmap
.
indexNgramsWith
.
withMap
indexTypedNgrams
=
fmap
.
indexNgramsWith
.
withMap
indexNgrams
::
Map
Text
NgramsId
->
Ngrams
->
Indexed
Int
Ngrams
indexNgrams
::
Hash
Map
Text
NgramsId
->
Ngrams
->
Indexed
Int
Ngrams
indexNgrams
=
indexNgramsWith
.
withMap
indexNgrams
=
indexNgramsWith
.
withMap
indexNgramsWith
::
(
Text
->
NgramsId
)
->
Ngrams
->
Indexed
Int
Ngrams
indexNgramsWith
::
(
Text
->
NgramsId
)
->
Ngrams
->
Indexed
Int
Ngrams
...
...
src/Gargantext/Database/Schema/NgramsPostag.hs
View file @
00dc93a0
...
@@ -49,7 +49,7 @@ data PosTag = PosTag { unPosTag :: Text }
...
@@ -49,7 +49,7 @@ data PosTag = PosTag { unPosTag :: Text }
|
NER
{
unNER
::
Text
}
-- TODO
|
NER
{
unNER
::
Text
}
-- TODO
------------------------------------------------------------------------
------------------------------------------------------------------------
type
NgramsPostag
=
NgramsPostagPoly
(
Maybe
Int
)
Lang
PostTagAlgo
(
Maybe
PosTag
)
NgramsTerm
NgramsTerm
(
Maybe
Int
)
--
type NgramsPostag = NgramsPostagPoly (Maybe Int) Lang PostTagAlgo (Maybe PosTag) NgramsTerm NgramsTerm (Maybe Int)
type
NgramsPostagDB
=
NgramsPostagPoly
(
Maybe
Int
)
Int
Int
(
Maybe
Text
)
Int
Int
Int
type
NgramsPostagDB
=
NgramsPostagPoly
(
Maybe
Int
)
Int
Int
(
Maybe
Text
)
Int
Int
Int
------------------------------------------------------------------------
------------------------------------------------------------------------
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment