Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
199
Issues
199
List
Board
Labels
Milestones
Merge Requests
12
Merge Requests
12
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
gargantext
haskell-gargantext
Commits
da954a33
Commit
da954a33
authored
Nov 24, 2022
by
Alexandre Delanoë
Browse files
Options
Browse Files
Download
Plain Diff
Merge remote-tracking branch 'origin/145-dev-graph-explorer-search-tfidf' into dev
parents
23d2a12f
00f726d1
Changes
37
Show whitespace changes
Inline
Side-by-side
Showing
37 changed files
with
308 additions
and
161 deletions
+308
-161
README.md
README.md
+2
-0
Main.hs
bin/gargantext-upgrade/Main.hs
+1
-2
docker-compose.yaml
devops/docker/docker-compose.yaml
+1
-1
schema.sql
devops/postgres/schema.sql
+1
-0
0.0.6.7.3.sql
devops/postgres/upgrade/0.0.6.7.3.sql
+2
-0
gargantext.cabal
gargantext.cabal
+4
-3
package.yaml
package.yaml
+2
-0
Text.hs
src-test/Core/Text.hs
+0
-1
Utils.hs
src-test/Core/Utils.hs
+28
-0
Clustering.hs
src-test/Graph/Clustering.hs
+0
-2
Main.hs
src-test/Main.hs
+4
-1
Crypto.hs
src-test/Utils/Crypto.hs
+1
-3
Ngrams.hs
src/Gargantext/API/Ngrams.hs
+4
-4
List.hs
src/Gargantext/API/Ngrams/List.hs
+10
-9
Tools.hs
src/Gargantext/API/Ngrams/Tools.hs
+2
-2
Search.hs
src/Gargantext/API/Search.hs
+10
-7
NodeStory.hs
src/Gargantext/Core/NodeStory.hs
+5
-5
Terms.hs
src/Gargantext/Core/Text/Terms.hs
+29
-31
Eleve.hs
src/Gargantext/Core/Text/Terms/Eleve.hs
+3
-0
Mono.hs
src/Gargantext/Core/Text/Terms/Mono.hs
+2
-2
Multi.hs
src/Gargantext/Core/Text/Terms/Multi.hs
+11
-8
Group.hs
src/Gargantext/Core/Text/Terms/Multi/Group.hs
+0
-1
En.hs
src/Gargantext/Core/Text/Terms/Multi/Lang/En.hs
+0
-1
PosTagging.hs
src/Gargantext/Core/Text/Terms/Multi/PosTagging.hs
+11
-7
Types.hs
src/Gargantext/Core/Text/Terms/Multi/PosTagging/Types.hs
+0
-1
WithList.hs
src/Gargantext/Core/Text/Terms/WithList.hs
+7
-5
Types.hs
src/Gargantext/Core/Types.hs
+6
-5
Utils.hs
src/Gargantext/Core/Utils.hs
+19
-2
LegacyMain.hs
src/Gargantext/Core/Viz/Phylo/Legacy/LegacyMain.hs
+1
-2
Flow.hs
src/Gargantext/Database/Action/Flow.hs
+38
-32
Utils.hs
src/Gargantext/Database/Action/Flow/Utils.hs
+10
-9
Search.hs
src/Gargantext/Database/Action/Search.hs
+76
-2
Facet.hs
src/Gargantext/Database/Query/Facet.hs
+2
-2
Join.hs
src/Gargantext/Database/Query/Join.hs
+3
-4
ContextNodeNgrams.hs
src/Gargantext/Database/Query/Table/ContextNodeNgrams.hs
+2
-1
UpdateOpaleye.hs
src/Gargantext/Database/Query/Table/Node/UpdateOpaleye.hs
+3
-4
ContextNodeNgrams.hs
src/Gargantext/Database/Schema/ContextNodeNgrams.hs
+8
-2
No files found.
README.md
View file @
da954a33
...
...
@@ -196,6 +196,8 @@ To build documentation, run:
stack
--docker
build
--haddock
--no-haddock-deps
--fast
```
(in
`.stack-work/dist/x86_64-linux-nix/Cabal-3.2.1.0/doc/html/gargantext`
).
## GraphQL
Some introspection information.
...
...
bin/gargantext-upgrade/Main.hs
View file @
da954a33
...
...
@@ -33,7 +33,6 @@ import Gargantext.Database.Admin.Trigger.Init
import
Gargantext.Database.Admin.Types.Hyperdata
(
HyperdataCorpus
)
import
Gargantext.Database.Admin.Types.Node
(
NodeType
(
NodeDocument
,
NodeContact
))
import
Gargantext.Database.Prelude
(
Cmd
''
,
Cmd
,
execPGSQuery
)
import
Gargantext.Database.Query.Table.Node
(
getOrMkList
)
import
Gargantext.Prelude
import
Gargantext.Prelude.Config
(
GargConfig
(
..
),
readConfig
)
import
Prelude
(
getLine
)
...
...
@@ -67,7 +66,7 @@ main = do
_ok
<-
getLine
cfg
<-
readConfig
iniPath
let
secret
=
_gc_secretkey
cfg
let
_
secret
=
_gc_secretkey
cfg
withDevEnv
iniPath
$
\
env
->
do
-- First upgrade the Database Schema
...
...
devops/docker/docker-compose.yaml
View file @
da954a33
...
...
@@ -25,7 +25,7 @@ services:
network_mode
:
host
#command: ["postgres", "-c", "log_statement=all"]
#ports:
#
- 5432:5432
#
- 5432:5432
environment
:
POSTGRES_USER
:
gargantua
POSTGRES_PASSWORD
:
C8kdcUrAQy66U
...
...
devops/postgres/schema.sql
View file @
da954a33
...
...
@@ -164,6 +164,7 @@ CREATE TABLE public.context_node_ngrams (
ngrams_id
INTEGER
NOT
NULL
REFERENCES
public
.
ngrams
(
id
)
ON
DELETE
CASCADE
,
ngrams_type
INTEGER
,
weight
double
precision
,
doc_count
INTEGER
DEFAULT
0
,
PRIMARY
KEY
(
context_id
,
node_id
,
ngrams_id
,
ngrams_type
)
);
ALTER
TABLE
public
.
context_node_ngrams
OWNER
TO
gargantua
;
...
...
devops/postgres/upgrade/0.0.6.7.3.sql
0 → 100644
View file @
da954a33
ALTER
TABLE
context_node_ngrams
ADD
COLUMN
doc_count
INTEGER
DEFAULT
0
;
gargantext.cabal
View file @
da954a33
cabal-version: 1.12
-- This file has been generated from package.yaml by hpack version 0.3
5.0
.
-- This file has been generated from package.yaml by hpack version 0.3
4.7
.
--
-- see: https://github.com/sol/hpack
...
...
@@ -50,6 +50,7 @@ library
Gargantext.Core.Types
Gargantext.Core.Types.Individu
Gargantext.Core.Types.Main
Gargantext.Core.Utils
Gargantext.Core.Utils.Prefix
Gargantext.Utils.Jobs
Gargantext.Utils.Jobs.API
...
...
@@ -87,6 +88,7 @@ library
Gargantext.Core.Text.Prepare
Gargantext.Core.Text.Search
Gargantext.Core.Text.Terms
Gargantext.Core.Text.Terms.Eleve
Gargantext.Core.Text.Terms.Mono
Gargantext.Core.Text.Terms.Multi.Lang.En
Gargantext.Core.Text.Terms.Multi.Lang.Fr
...
...
@@ -211,7 +213,6 @@ library
Gargantext.Core.Text.Samples.EN
Gargantext.Core.Text.Samples.FR
Gargantext.Core.Text.Samples.SP
Gargantext.Core.Text.Terms.Eleve
Gargantext.Core.Text.Terms.Mono.Stem
Gargantext.Core.Text.Terms.Mono.Stem.En
Gargantext.Core.Text.Terms.Mono.Token
...
...
@@ -221,7 +222,6 @@ library
Gargantext.Core.Text.Terms.Multi.PosTagging
Gargantext.Core.Text.Terms.Multi.PosTagging.Types
Gargantext.Core.Types.Phylo
Gargantext.Core.Utils
Gargantext.Core.Utils.DateUtils
Gargantext.Core.Viz
Gargantext.Core.Viz.Chart
...
...
@@ -809,6 +809,7 @@ test-suite garg-test
Core.Text
Core.Text.Examples
Core.Text.Flow
Core.Utils
Graph.Clustering
Graph.Distance
Ngrams.Lang
...
...
package.yaml
View file @
da954a33
...
...
@@ -74,6 +74,7 @@ library:
-
Gargantext.Core.Types
-
Gargantext.Core.Types.Individu
-
Gargantext.Core.Types.Main
-
Gargantext.Core.Utils
-
Gargantext.Core.Utils.Prefix
-
Gargantext.Utils.Jobs
-
Gargantext.Utils.Jobs.API
...
...
@@ -111,6 +112,7 @@ library:
-
Gargantext.Core.Text.Prepare
-
Gargantext.Core.Text.Search
-
Gargantext.Core.Text.Terms
-
Gargantext.Core.Text.Terms.Eleve
-
Gargantext.Core.Text.Terms.Mono
-
Gargantext.Core.Text.Terms.Multi.Lang.En
-
Gargantext.Core.Text.Terms.Multi.Lang.Fr
...
...
src-test/Core/Text.hs
View file @
da954a33
{-|
Module : Graph.Clustering
Description : Basic tests to avoid quick regression
...
...
src-test/Core/Utils.hs
0 → 100644
View file @
da954a33
{-|
Module : Core.Utils
Description :
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
module
Core.Utils
where
import
Test.Hspec
import
Gargantext.Prelude
import
Gargantext.Core.Utils
-- | Core.Utils tests
test
::
IO
()
test
=
hspec
$
do
describe
"check if groupWithCounts works"
$
do
it
"simple integer array"
$
do
(
groupWithCounts
[
1
,
2
,
3
,
1
,
2
,
3
])
`
shouldBe
`
[(
1
,
2
),
(
2
,
2
),
(
3
,
2
)]
it
"string"
$
do
(
groupWithCounts
"abccba"
)
`
shouldBe
`
[(
'a'
,
2
),
(
'b'
,
2
),
(
'c'
,
2
)]
src-test/Graph/Clustering.hs
View file @
da954a33
{-|
Module : Graph.Clustering
Description : Basic tests to avoid quick regression
...
...
@@ -40,4 +39,3 @@ test = hspec $ do
let
result
=
List
.
length
partitions
>
1
shouldBe
True
result
src-test/Main.hs
View file @
da954a33
...
...
@@ -11,6 +11,8 @@ Portability : POSIX
import
Gargantext.Prelude
import
Gargantext.Core
(
Lang
(
..
))
import
qualified
Core.Utils
as
Utils
--import qualified Ngrams.Lang.Fr as Fr
--import qualified Ngrams.Lang as Lang
import
qualified
Ngrams.Lang.Occurrences
as
Occ
...
...
@@ -22,6 +24,7 @@ import qualified Utils.Crypto as Crypto
main
::
IO
()
main
=
do
Utils
.
test
-- Occ.parsersTest
-- Lang.ngramsExtractionTest FR
-- Lang.ngramsExtractionTest EN
...
...
src-test/Utils/Crypto.hs
View file @
da954a33
{-|
Module : Utils.Crypto
Description :
...
...
@@ -43,4 +42,3 @@ test = hspec $ do
let
hash2
=
hash
([
"b"
,
"a"
]
::
[
Text
])
it
"compare"
$
do
hash1
`
shouldBe
`
hash2
src/Gargantext/API/Ngrams.hs
View file @
da954a33
...
...
@@ -187,9 +187,9 @@ saveNodeStory :: ( MonadReader env m, MonadBase IO m, HasNodeStorySaver env )
saveNodeStory
=
do
saver
<-
view
hasNodeStorySaver
liftBase
$
do
Gargantext
.
Prelude
.
putStrLn
"---- Running node story saver ----"
--
Gargantext.Prelude.putStrLn "---- Running node story saver ----"
saver
Gargantext
.
Prelude
.
putStrLn
"---- Node story saver finished ----"
--
Gargantext.Prelude.putStrLn "---- Node story saver finished ----"
saveNodeStoryImmediate
::
(
MonadReader
env
m
,
MonadBase
IO
m
,
HasNodeStoryImmediateSaver
env
)
...
...
@@ -197,9 +197,9 @@ saveNodeStoryImmediate :: ( MonadReader env m, MonadBase IO m, HasNodeStoryImmed
saveNodeStoryImmediate
=
do
saver
<-
view
hasNodeStoryImmediateSaver
liftBase
$
do
Gargantext
.
Prelude
.
putStrLn
"---- Running node story immediate saver ----"
--
Gargantext.Prelude.putStrLn "---- Running node story immediate saver ----"
saver
Gargantext
.
Prelude
.
putStrLn
"---- Node story immediate saver finished ----"
--
Gargantext.Prelude.putStrLn "---- Node story immediate saver finished ----"
listTypeConflictResolution
::
ListType
->
ListType
->
ListType
...
...
src/Gargantext/API/Ngrams/List.hs
View file @
da954a33
...
...
@@ -158,8 +158,9 @@ reIndexWith cId lId nt lts = do
-- Checking Text documents where orphans match
-- TODO Tests here
let
ngramsByDoc
=
map
(
HashMap
.
fromListWith
(
<>
))
$
map
(
map
(
\
(
k
,
v
)
->
(
SimpleNgrams
(
text2ngrams
k
),
v
)))
-- fromListWith (<>)
ngramsByDoc
=
map
(
HashMap
.
fromList
)
$
map
(
map
(
\
((
k
,
cnt
),
v
)
->
(
SimpleNgrams
(
text2ngrams
k
),
over
(
traverse
.
traverse
)
(
\
p
->
(
p
,
cnt
))
v
)))
$
map
(
\
doc
->
List
.
zip
(
termsInText
(
buildPatterns
$
map
(
\
k
->
(
Text
.
splitOn
" "
$
unNgramsTerm
k
,
[]
))
orphans
)
$
Text
.
unlines
$
catMaybes
...
...
src/Gargantext/API/Ngrams/Tools.hs
View file @
da954a33
src/Gargantext/API/Search.hs
View file @
da954a33
...
...
@@ -18,6 +18,7 @@ module Gargantext.API.Search
where
import
Data.Aeson
hiding
(
defaultTaggedObject
)
import
Data.List
(
concat
)
import
Data.Maybe
(
fromMaybe
)
import
Data.Swagger
hiding
(
fieldLabelModifier
,
Contact
)
import
Data.Text
(
Text
)
...
...
@@ -55,7 +56,7 @@ api :: NodeId -> GargServer (API SearchResult)
api
nId
(
SearchQuery
q
SearchDoc
)
o
l
order
=
SearchResult
<$>
SearchResultDoc
<$>
map
(
toRow
nId
)
<$>
searchInCorpus
nId
False
q
o
l
order
<$>
searchInCorpus
nId
False
(
concat
q
)
o
l
order
api
nId
(
SearchQuery
q
SearchContact
)
o
l
order
=
do
printDebug
"isPairedWith"
nId
...
...
@@ -67,13 +68,15 @@ api nId (SearchQuery q SearchContact) o l order = do
Just
aId
->
SearchResult
<$>
SearchResultContact
<$>
map
(
toRow
aId
)
<$>
searchInCorpusWithContacts
nId
aId
q
o
l
order
<$>
searchInCorpusWithContacts
nId
aId
(
concat
q
)
o
l
order
api
_nId
(
SearchQuery
_q
SearchDocWithNgrams
)
_o
_l
_order
=
undefined
-----------------------------------------------------------------------
-----------------------------------------------------------------------
-- | Main Types
-----------------------------------------------------------------------
data
SearchType
=
SearchDoc
|
SearchContact
data
SearchType
=
SearchDoc
|
SearchContact
|
SearchDocWithNgrams
deriving
(
Generic
)
instance
FromJSON
SearchType
where
parseJSON
=
genericParseJSON
(
defaultOptions
{
sumEncoding
=
ObjectWithSingleField
})
...
...
@@ -85,7 +88,7 @@ instance Arbitrary SearchType where
-----------------------------------------------------------------------
data
SearchQuery
=
SearchQuery
{
query
::
!
[
Text
]
SearchQuery
{
query
::
!
[
[
Text
]
]
,
expected
::
!
SearchType
}
deriving
(
Generic
)
...
...
@@ -100,7 +103,7 @@ instance ToSchema SearchQuery
-}
instance
Arbitrary
SearchQuery
where
arbitrary
=
elements
[
SearchQuery
[
"electrodes"
]
SearchDoc
]
arbitrary
=
elements
[
SearchQuery
[
[
"electrodes"
]
]
SearchDoc
]
-- arbitrary = elements [SearchQuery "electrodes" 1 ] --SearchDoc]
-----------------------------------------------------------------------
data
SearchResult
=
...
...
src/Gargantext/Core/NodeStory.hs
View file @
da954a33
...
...
@@ -505,22 +505,22 @@ updateNodeStory c nodeId@(NodeId _nId) currentArchive newArchive = do
--printDebug "[updateNodeStory] updates" $ Text.unlines $ (Text.pack . show) <$> updates
-- 2. Perform inserts/deletes/updates
printDebug
"[updateNodeStory] applying insert"
()
--
printDebug "[updateNodeStory] applying insert" ()
insertArchiveList
c
nodeId
$
Archive
{
_a_version
=
newArchive
^.
a_version
,
_a_history
=
[]
,
_a_state
=
archiveStateFromList
inserts
}
printDebug
"[updateNodeStory] insert applied"
()
--
printDebug "[updateNodeStory] insert applied" ()
--TODO Use currentArchive ^. a_version in delete and report error
-- if entries with (node_id, ngrams_type_id, ngrams_id) but
-- different version are found.
deleteArchiveList
c
nodeId
$
Archive
{
_a_version
=
newArchive
^.
a_version
,
_a_history
=
[]
,
_a_state
=
archiveStateFromList
deletes
}
printDebug
"[updateNodeStory] delete applied"
()
--
printDebug "[updateNodeStory] delete applied" ()
updateArchiveList
c
nodeId
$
Archive
{
_a_version
=
newArchive
^.
a_version
,
_a_history
=
[]
,
_a_state
=
archiveStateFromList
updates
}
printDebug
"[updateNodeStory] update applied"
()
--
printDebug "[updateNodeStory] update applied" ()
pure
()
-- where
...
...
src/Gargantext/Core/Text/Terms.hs
View file @
da954a33
...
...
@@ -28,8 +28,9 @@ compute graph
-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE ConstrainedClassMethods #-}
{-# LANGUAGE StandaloneDeriving #-}
{-# LANGUAGE TemplateHaskell #-}
module
Gargantext.Core.Text.Terms
where
...
...
@@ -47,6 +48,7 @@ import qualified Data.Set as Set
import
qualified
Data.Text
as
Text
import
qualified
Data.HashMap.Strict
as
HashMap
import
Gargantext.Core
import
Gargantext.Core.Utils
(
groupWithCounts
)
import
Gargantext.Core.Text
(
sentences
,
HasText
(
..
))
import
Gargantext.Core.Text.Terms.Eleve
(
mainEleveWith
,
Tries
,
Token
,
buildTries
,
toToken
)
import
Gargantext.Core.Text.Terms.Mono
(
monoTerms
)
...
...
@@ -70,6 +72,7 @@ data TermType lang
,
_tt_model
::
!
(
Maybe
(
Tries
Token
()
))
}
deriving
(
Generic
)
deriving
instance
(
Show
lang
)
=>
Show
(
TermType
lang
)
makeLenses
''
T
ermType
--group :: [Text] -> [Text]
...
...
@@ -78,16 +81,14 @@ makeLenses ''TermType
-- remove Stop Words
-- map (filter (\t -> not . elem t)) $
------------------------------------------------------------------------
-- | Sugar to extract terms from text (hidd
eng mapM
from end user).
-- | Sugar to extract terms from text (hidd
ing 'mapM'
from end user).
--extractTerms :: Traversable t => TermType Lang -> t Text -> IO (t [Terms])
extractTerms
::
TermType
Lang
->
[
Text
]
->
IO
[[
Terms
]]
extractTerms
::
TermType
Lang
->
[
Text
]
->
IO
[[
TermsWithCount
]]
extractTerms
(
Unsupervised
{
..
})
xs
=
mapM
(
terms
(
Unsupervised
{
_tt_model
=
Just
m'
,
..
}))
xs
where
m'
=
case
_tt_model
of
Just
m''
->
m''
Nothing
->
newTries
_tt_windowSize
(
Text
.
intercalate
" "
xs
)
extractTerms
termTypeLang
xs
=
mapM
(
terms
termTypeLang
)
xs
...
...
@@ -116,12 +117,13 @@ data ExtractedNgrams = SimpleNgrams { unSimpleNgrams :: Ngrams }
instance
Hashable
ExtractedNgrams
-- | A typeclass that represents extracting ngrams from an entity.
class
ExtractNgramsT
h
where
extractNgramsT
::
HasText
h
=>
TermType
Lang
->
h
->
Cmd
err
(
HashMap
ExtractedNgrams
(
Map
NgramsType
Int
))
->
Cmd
err
(
HashMap
ExtractedNgrams
(
Map
NgramsType
Int
,
TermsCount
))
------------------------------------------------------------------------
enrichedTerms
::
Lang
->
PosTagAlgo
->
POS
->
Terms
->
NgramsPostag
enrichedTerms
l
pa
po
(
Terms
ng1
ng2
)
=
...
...
@@ -163,43 +165,41 @@ isSimpleNgrams (SimpleNgrams _) = True
isSimpleNgrams
_
=
False
------------------------------------------------------------------------
-- | Terms from
Text
--
Mono
: mono terms
--
Multi
: multi terms
--
MonoMulti
: mono and multi
-- | Terms from
'Text'
--
'Mono'
: mono terms
--
'Multi'
: multi terms
--
'MonoMulti'
: mono and multi
-- TODO : multi terms should exclude mono (intersection is not empty yet)
terms
::
TermType
Lang
->
Text
->
IO
[
Terms
]
terms
::
TermType
Lang
->
Text
->
IO
[
Terms
WithCount
]
terms
(
Mono
lang
)
txt
=
pure
$
monoTerms
lang
txt
terms
(
Multi
lang
)
txt
=
multiterms
lang
txt
terms
(
MonoMulti
lang
)
txt
=
terms
(
Multi
lang
)
txt
terms
(
Unsupervised
{
..
})
txt
=
termsUnsupervised
(
Unsupervised
{
_tt_model
=
Just
m'
,
..
})
txt
terms
(
Unsupervised
{
..
})
txt
=
pure
$
termsUnsupervised
(
Unsupervised
{
_tt_model
=
Just
m'
,
..
})
txt
where
m'
=
maybe
(
newTries
_tt_ngramsSize
txt
)
identity
_tt_model
-- terms (WithList list) txt = pure . concat $ extractTermsWithList list txt
------------------------------------------------------------------------
-- | Unsupervised ngrams extraction
-- language agnostic extraction
-- TODO: remove IO
-- TODO: newtype BlockText
type
WindowSize
=
Int
type
MinNgramSize
=
Int
termsUnsupervised
::
TermType
Lang
->
Text
->
IO
[
Terms
]
termsUnsupervised
(
Unsupervised
l
n
s
m
)
=
pure
.
map
(
text2term
l
)
.
List
.
nub
.
(
List
.
filter
(
\
l'
->
List
.
length
l'
>=
s
))
-- | Unsupervised ngrams extraction
-- language agnostic extraction
-- TODO: newtype BlockText
termsUnsupervised
::
TermType
Lang
->
Text
->
[
TermsWithCount
]
termsUnsupervised
(
Unsupervised
{
_tt_model
=
Nothing
})
=
panic
"[termsUnsupervised] no model"
termsUnsupervised
(
Unsupervised
{
_tt_model
=
Just
_tt_model
,
..
})
=
map
(
\
(
t
,
cnt
)
->
(
text2term
_tt_lang
t
,
cnt
))
.
groupWithCounts
-- . List.nub
.
(
List
.
filter
(
\
l'
->
List
.
length
l'
>=
_tt_windowSize
))
.
List
.
concat
.
mainEleveWith
(
maybe
(
panic
"no model"
)
identity
m
)
n
.
mainEleveWith
_tt_model
_tt_ngramsSize
.
uniText
termsUnsupervised
_
=
undefined
newTries
::
Int
->
Text
->
Tries
Token
()
newTries
n
t
=
buildTries
n
(
fmap
toToken
$
uniText
t
)
...
...
@@ -217,5 +217,3 @@ text2term lang txt = Terms txt (Set.fromList $ map (stem lang) txt)
isPunctuation
::
Text
->
Bool
isPunctuation
x
=
List
.
elem
x
$
(
Text
.
pack
.
pure
)
<$>
(
"!?(),;.:"
::
String
)
src/Gargantext/Core/Text/Terms/Eleve.hs
View file @
da954a33
...
...
@@ -32,6 +32,7 @@ Notes for current implementation:
-}
{-# LANGUAGE ConstraintKinds #-}
{-# LANGUAGE StandaloneDeriving #-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE TypeFamilies #-}
...
...
@@ -278,6 +279,8 @@ data Tries k e = Tries
makeLenses
''
T
ries
deriving
instance
(
Show
k
,
Show
e
)
=>
Show
(
Tries
k
e
)
buildTries
::
Int
->
[[
Token
]]
->
Tries
Token
()
buildTries
n
sentences
=
Tries
{
_fwd
=
buildTrie
Forward
n
sentences
...
...
src/Gargantext/Core/Text/Terms/Mono.hs
View file @
da954a33
...
...
@@ -40,8 +40,8 @@ words = monoTexts
isSep
::
Char
->
Bool
isSep
=
(`
elem
`
(
",.:;?!(){}[]
\"\'
"
::
String
))
monoTerms
::
Lang
->
Text
->
[
Terms
]
monoTerms
l
txt
=
map
(
monoText2term
l
)
$
monoTexts
txt
monoTerms
::
Lang
->
Text
->
[
Terms
WithCount
]
monoTerms
l
txt
=
map
(
\
t
->
(
monoText2term
l
t
,
1
)
)
$
monoTexts
txt
monoTexts
::
Text
->
[
Text
]
monoTexts
=
L
.
concat
.
monoTextsBySentence
...
...
src/Gargantext/Core/Text/Terms/Multi.hs
View file @
da954a33
...
...
@@ -21,6 +21,7 @@ import Data.List (concat)
import
Gargantext.Prelude
import
Gargantext.Core
(
Lang
(
..
))
import
Gargantext.Core.Types
import
Gargantext.Core.Utils
(
groupWithCounts
)
import
Gargantext.Core.Text.Terms.Multi.PosTagging
import
Gargantext.Core.Text.Terms.Multi.PosTagging.Types
...
...
@@ -37,14 +38,16 @@ import qualified Gargantext.Utils.SpacyNLP as SpacyNLP
type
NLP_API
=
Lang
->
Text
->
IO
PosSentences
-------------------------------------------------------------------
multiterms
::
Lang
->
Text
->
IO
[
Terms
]
multiterms
=
multiterms'
tokenTag2terms
multiterms
::
Lang
->
Text
->
IO
[
TermsWithCount
]
multiterms
l
txt
=
do
ret
<-
multiterms'
tokenTag2terms
l
txt
pure
$
groupWithCounts
ret
where
multiterms'
::
(
TokenTag
->
a
)
->
Lang
->
Text
->
IO
[
a
]
multiterms'
f
lang
txt
=
concat
multiterms'
f
lang
txt
'
=
concat
<$>
map
(
map
f
)
<$>
map
(
filter
(
\
t
->
_my_token_pos
t
==
Just
NP
))
<$>
tokenTags
lang
txt
<$>
tokenTags
lang
txt
'
-------------------------------------------------------------------
tokenTag2terms
::
TokenTag
->
Terms
...
...
src/Gargantext/Core/Text/Terms/Multi/Group.hs
View file @
da954a33
...
...
@@ -30,4 +30,3 @@ group2 p1 p2 (x@(TokenTag _ _ Nothing _):y) = (x: group2 p1 p2 y)
group2
_
_
[
x
@
(
TokenTag
_
_
(
Just
_
)
_
)]
=
[
x
]
group2
p1
p2
(
x
@
(
TokenTag
_
_
(
Just
_
)
_
)
:
y
@
(
TokenTag
_
_
Nothing
_
)
:
z
)
=
(
x
:
y
:
group2
p1
p2
(
y
:
z
))
group2
_
_
[]
=
[]
src/Gargantext/Core/Text/Terms/Multi/Lang/En.hs
View file @
da954a33
...
...
@@ -40,4 +40,3 @@ groupTokens ntags = group2 NP NP
--groupNgrams ((x,_,"LOCATION"):(y,yy,"LOCATION"):xs) = groupNgrams ((x <> " " <> y,yy,"LOCATION"):xs)
--
--groupNgrams (x:xs) = (x:(groupNgrams xs))
src/Gargantext/Core/Text/Terms/Multi/PosTagging.hs
View file @
da954a33
...
...
@@ -45,16 +45,19 @@ tokens2tokensTags :: [Token] -> [TokenTag]
tokens2tokensTags
ts
=
filter'
$
map
tokenTag
ts
------------------------------------------------------------------------
tokenTag
::
Token
->
TokenTag
tokenTag
(
Token
_
w
_
l
_
_
p
n
_
_
)
=
TokenTag
w'
l'
p
n
tokenTag
(
Token
{
..
})
=
TokenTag
{
_my_token_word
=
w'
,
_my_token_lemma
=
l'
,
_my_token_pos
=
_tokenPos
,
_my_token_ner
=
_tokenNer
}
where
w'
=
split
w
l'
=
fromList
(
split
l
)
w'
=
split
_tokenWord
l'
=
fromList
(
split
_tokenLemma
)
split
=
splitOn
(
pack
" "
)
.
toLower
filter'
::
[
TokenTag
]
->
[
TokenTag
]
filter'
xs
=
filter
isNgrams
xs
where
isNgrams
(
TokenTag
_
_
p
n
)
=
isJust
p
||
isJust
n
isNgrams
(
TokenTag
{
..
})
=
isJust
_my_token_pos
||
isJust
_my_token_ner
------------------------------------------------------------------------
-- request =
...
...
@@ -80,6 +83,7 @@ corenlp' lang txt = do
FR
->
"{
\"
annotators
\"
:
\"
tokenize,ssplit,pos,lemma,ner
\"
,
\"
parse.model
\"
:
\"
edu/stanford/nlp/models/lexparser/frenchFactored.ser.gz
\"
,
\"
pos.model
\"
:
\"
edu/stanford/nlp/models/pos-tagger/french/french.tagger
\"
,
\"
tokenize.language
\"
:
\"
fr
\"
,
\"
outputFormat
\"
:
\"
json
\"
}"
_
->
panic
$
pack
"not implemented yet"
url
<-
parseRequest
$
"POST http://localhost:9000/?properties="
<>
properties
-- curl -XPOST 'http://localhost:9000/?properties=%7B%22annotators%22:%20%22tokenize,ssplit,pos,ner%22,%20%22outputFormat%22:%20%22json%22%7D' -d 'hello world, hello' | jq .
let
request
=
setRequestBodyLBS
(
cs
txt
)
url
httpJSON
request
...
...
src/Gargantext/Core/Text/Terms/Multi/PosTagging/Types.hs
View file @
da954a33
...
...
@@ -52,4 +52,3 @@ data PosSentences = PosSentences { _sentences :: [Sentence]}
deriving
(
Show
,
Generic
)
$
(
deriveJSON
(
unPrefix
"_"
)
''
P
osSentences
)
src/Gargantext/Core/Text/Terms/WithList.hs
View file @
da954a33
...
...
@@ -21,6 +21,8 @@ import Data.Text (Text, concat, unwords)
import
Gargantext.Prelude
import
Gargantext.Core.Text.Context
import
Gargantext.Core.Text.Terms.Mono
(
monoTextsBySentence
)
import
Gargantext.Core.Types
(
TermsCount
)
import
Gargantext.Core.Utils
(
groupWithCounts
)
import
Prelude
(
error
)
import
qualified
Data.Algorithms.KMP
as
KMP
import
qualified
Data.IntMap.Strict
as
IntMap
...
...
@@ -71,8 +73,8 @@ buildPatterns = sortWith (Down . _pat_length) . concatMap buildPattern
--------------------------------------------------------------------------
-- Utils
type
MatchedText
=
Text
termsInText
::
Patterns
->
Text
->
[
MatchedText
]
termsInText
pats
txt
=
List
.
nub
termsInText
::
Patterns
->
Text
->
[
(
MatchedText
,
TermsCount
)
]
termsInText
pats
txt
=
groupWithCounts
$
List
.
concat
$
map
(
map
unwords
)
$
extractTermsWithList
pats
txt
...
...
src/Gargantext/Core/Types.hs
View file @
da954a33
...
...
@@ -17,7 +17,7 @@ commentary with @some markup@.
module
Gargantext.Core.Types
(
module
Gargantext
.
Core
.
Types
.
Main
,
module
Gargantext
.
Database
.
Admin
.
Types
.
Node
,
DebugMode
(
..
),
withDebugMode
,
Term
,
Terms
(
..
)
,
Term
,
Terms
(
..
)
,
TermsCount
,
TermsWithCount
,
TokenTag
(
..
),
POS
(
..
),
NER
(
..
)
,
Label
,
Stems
,
HasInvalidError
(
..
),
assertValid
...
...
@@ -70,10 +70,13 @@ type Label = [Text]
data
Terms
=
Terms
{
_terms_label
::
Label
,
_terms_stem
::
Stems
}
deriving
(
Ord
,
Show
)
instance
Eq
Terms
where
(
==
)
(
Terms
_
s1
)
(
Terms
_
s2
)
=
s1
==
s2
type
TermsCount
=
Int
type
TermsWithCount
=
(
Terms
,
TermsCount
)
------------------------------------------------------------------------
data
Tag
=
POS
|
NER
deriving
(
Show
,
Eq
)
...
...
@@ -208,5 +211,3 @@ data TODO = TODO
instance
ToSchema
TODO
where
instance
ToParamSchema
TODO
where
----------------------------------------------------------------------------
src/Gargantext/Core/Utils.hs
View file @
da954a33
...
...
@@ -19,9 +19,12 @@ module Gargantext.Core.Utils (
,
alphanum
,
choices
,
randomString
,
groupWithCounts
,
addTuples
)
where
import
Data.Char
(
chr
,
ord
)
import
qualified
Data.List
as
List
import
Data.Maybe
import
Data.Monoid
import
Data.Text
(
Text
,
pack
)
...
...
@@ -57,3 +60,17 @@ randomString :: Int -> IO Text
randomString
num
=
do
str
<-
choices
num
alphanum
pure
$
pack
str
-- | Given a list of items of type 'a', return list with unique items
-- (like List.nub) but tuple-d with their counts in the original list
groupWithCounts
::
(
Ord
a
,
Eq
a
)
=>
[
a
]
->
[(
a
,
Int
)]
groupWithCounts
=
map
f
.
List
.
group
.
List
.
sort
where
f
[]
=
panic
"[groupWithCounts] impossible"
f
ts
@
(
t
:
_
)
=
(
t
,
length
ts
)
addTuples
::
(
Num
a
,
Num
b
)
=>
(
a
,
b
)
->
(
a
,
b
)
->
(
a
,
b
)
addTuples
(
a1
,
b1
)
(
a2
,
b2
)
=
(
a1
+
a2
,
b1
+
b2
)
src/Gargantext/Core/Viz/Phylo/Legacy/LegacyMain.hs
View file @
da954a33
...
...
@@ -65,7 +65,7 @@ flowPhylo cId = do
patterns
=
buildPatterns
termList
-- | To filter the Ngrams of a document based on the termList
filterTerms
::
Patterns
->
(
Date
,
Text
)
->
(
Date
,
[
Text
])
filterTerms
patterns'
(
y
,
d
)
=
(
y
,
termsInText
patterns'
d
)
filterTerms
patterns'
(
y
,
d
)
=
(
y
,
fst
<$>
termsInText
patterns'
d
)
docs
=
map
((
\
(
y
,
t
)
->
Document
y
t
)
.
filterTerms
patterns
)
docs'
...
...
@@ -123,4 +123,3 @@ writePhylo _fp _phview = undefined
-- refactor 2021
-- viewPhylo2Svg :: PhyloView -> IO DB.ByteString
-- viewPhylo2Svg p = graphvizWithHandle Dot (viewToDot p) Svg DB.hGetContents
src/Gargantext/Database/Action/Flow.hs
View file @
da954a33
...
...
@@ -48,7 +48,7 @@ module Gargantext.Database.Action.Flow -- (flowDatabase, ngrams2list)
where
import
Conduit
import
Control.Lens
((
^.
),
view
,
_Just
,
makeLenses
)
import
Control.Lens
((
^.
),
view
,
_Just
,
makeLenses
,
over
,
traverse
)
import
Data.Aeson.TH
(
deriveJSON
)
import
Data.Conduit.Internal
(
zipSources
)
import
Data.Either
...
...
@@ -60,7 +60,6 @@ import Data.Maybe (catMaybes)
import
Data.Monoid
import
Data.Swagger
import
qualified
Data.Text
as
T
import
Data.Traversable
(
traverse
)
import
Data.Tuple.Extra
(
first
,
second
)
import
GHC.Generics
(
Generic
)
import
Servant.Client
(
ClientError
)
...
...
@@ -83,9 +82,10 @@ import Gargantext.Core.Text.List.Group.WithStem ({-StopSize(..),-} GroupParams(.
import
Gargantext.Core.Text.List.Social
(
FlowSocialListWith
)
import
Gargantext.Core.Text.Terms
import
Gargantext.Core.Text.Terms.Mono.Stem.En
(
stemIt
)
import
Gargantext.Core.Types
(
POS
(
NP
))
import
Gargantext.Core.Types
(
POS
(
NP
)
,
TermsCount
)
import
Gargantext.Core.Types.Individu
(
User
(
..
))
import
Gargantext.Core.Types.Main
import
Gargantext.Core.Utils
(
addTuples
)
import
Gargantext.Core.Utils.Prefix
(
unPrefix
,
unPrefixSwagger
)
import
Gargantext.Database.Action.Flow.List
import
Gargantext.Database.Action.Flow.Types
...
...
@@ -357,25 +357,27 @@ insertMasterDocs c lang hs = do
-- this will enable global database monitoring
-- maps :: IO Map Ngrams (Map NgramsType (Map NodeId Int))
mapNgramsDocs'
::
HashMap
ExtractedNgrams
(
Map
NgramsType
(
Map
NodeId
Int
))
mapNgramsDocs'
::
HashMap
ExtractedNgrams
(
Map
NgramsType
(
Map
NodeId
(
Int
,
TermsCount
)
))
<-
mapNodeIdNgrams
<$>
documentIdWithNgrams
(
extractNgramsT
$
withLang
lang
documentsWithId
)
documentsWithId
lId
<-
getOrMkList
masterCorpusId
masterUserId
-- _ <- saveDocNgramsWith lId mapNgramsDocs'
_
<-
saveDocNgramsWith
lId
mapNgramsDocs'
-- _cooc <- insertDefaultNode NodeListCooc lId masterUserId
pure
ids'
saveDocNgramsWith
::
(
FlowCmdM
env
err
m
)
saveDocNgramsWith
::
(
FlowCmdM
env
err
m
)
=>
ListId
->
HashMap
ExtractedNgrams
(
Map
NgramsType
(
Map
NodeId
Int
))
->
HashMap
ExtractedNgrams
(
Map
NgramsType
(
Map
NodeId
(
Int
,
TermsCount
)
))
->
m
()
saveDocNgramsWith
lId
mapNgramsDocs'
=
do
terms2id
<-
insertExtractedNgrams
$
HashMap
.
keys
mapNgramsDocs'
--printDebug "terms2id" terms2id
--printDebug "[saveDocNgramsWith] mapNgramsDocs'" mapNgramsDocs'
let
mapNgramsDocsNoCount
=
over
(
traverse
.
traverse
.
traverse
)
fst
mapNgramsDocs'
terms2id
<-
insertExtractedNgrams
$
HashMap
.
keys
mapNgramsDocsNoCount
let
mapNgramsDocs
=
HashMap
.
mapKeys
extracted2ngrams
mapNgramsDocs'
...
...
@@ -392,7 +394,7 @@ saveDocNgramsWith lId mapNgramsDocs' = do
<*>
Just
(
fromIntegral
w
::
Double
)
|
(
terms''
,
mapNgramsTypes
)
<-
HashMap
.
toList
mapNgramsDocs
,
(
ngrams_type
,
mapNodeIdWeight
)
<-
Map
.
toList
mapNgramsTypes
,
(
nId
,
w
)
<-
Map
.
toList
mapNodeIdWeight
,
(
nId
,
(
w
,
_cnt
)
)
<-
Map
.
toList
mapNodeIdWeight
]
-- to be removed
...
...
@@ -451,7 +453,7 @@ mergeData rs = catMaybes . map toDocumentWithId . Map.toList
------------------------------------------------------------------------
documentIdWithNgrams
::
HasNodeError
err
=>
(
a
->
Cmd
err
(
HashMap
b
(
Map
NgramsType
Int
)))
->
Cmd
err
(
HashMap
b
(
Map
NgramsType
Int
,
TermsCount
)))
->
[
Indexed
NodeId
a
]
->
Cmd
err
[
DocumentIdWithNgrams
a
b
]
documentIdWithNgrams
f
=
traverse
toDocumentIdWithNgrams
...
...
@@ -466,13 +468,17 @@ mapNodeIdNgrams :: (Ord b, Hashable b)
=>
[
DocumentIdWithNgrams
a
b
]
->
HashMap
b
(
Map
NgramsType
(
Map
NodeId
Int
)
(
Map
NodeId
(
Int
,
TermsCount
)
)
)
mapNodeIdNgrams
=
HashMap
.
unionsWith
(
Map
.
unionWith
(
Map
.
unionWith
(
+
)
))
.
fmap
f
mapNodeIdNgrams
=
HashMap
.
unionsWith
(
Map
.
unionWith
(
Map
.
unionWith
addTuples
))
.
fmap
f
where
-- | NOTE We are somehow multiplying 'TermsCount' here: If the
-- same ngrams term has different ngrams types, the 'TermsCount'
-- for it (which is the number of times the terms appears in a
-- document) is copied over to all its types.
f
::
DocumentIdWithNgrams
a
b
->
HashMap
b
(
Map
NgramsType
(
Map
NodeId
Int
))
f
d
=
fmap
(
fmap
(
Map
.
singleton
nId
)
)
$
documentNgrams
d
->
HashMap
b
(
Map
NgramsType
(
Map
NodeId
(
Int
,
TermsCount
)
))
f
d
=
fmap
(
\
(
ngramsTypeMap
,
cnt
)
->
fmap
(
\
i
->
Map
.
singleton
nId
(
i
,
cnt
))
ngramsTypeMap
)
$
documentNgrams
d
where
nId
=
_index
$
documentWithId
d
...
...
@@ -483,25 +489,25 @@ instance ExtractNgramsT HyperdataContact
extractNgramsT
l
hc
=
HashMap
.
mapKeys
(
cleanExtractedNgrams
255
)
<$>
extract
l
hc
where
extract
::
TermType
Lang
->
HyperdataContact
->
Cmd
err
(
HashMap
ExtractedNgrams
(
Map
NgramsType
Int
))
->
Cmd
err
(
HashMap
ExtractedNgrams
(
Map
NgramsType
Int
,
TermsCount
))
extract
_l
hc'
=
do
let
authors
=
map
text2ngrams
$
maybe
[
"Nothing"
]
(
\
a
->
[
a
])
$
view
(
hc_who
.
_Just
.
cw_lastName
)
hc'
pure
$
HashMap
.
fromList
$
[(
SimpleNgrams
a'
,
Map
.
singleton
Authors
1
)
|
a'
<-
authors
]
pure
$
HashMap
.
fromList
$
[(
SimpleNgrams
a'
,
(
Map
.
singleton
Authors
1
,
1
)
)
|
a'
<-
authors
]
instance
ExtractNgramsT
HyperdataDocument
where
extractNgramsT
::
TermType
Lang
->
HyperdataDocument
->
Cmd
err
(
HashMap
ExtractedNgrams
(
Map
NgramsType
Int
))
->
Cmd
err
(
HashMap
ExtractedNgrams
(
Map
NgramsType
Int
,
TermsCount
))
extractNgramsT
lang
hd
=
HashMap
.
mapKeys
(
cleanExtractedNgrams
255
)
<$>
extractNgramsT'
lang
hd
where
extractNgramsT'
::
TermType
Lang
->
HyperdataDocument
->
Cmd
err
(
HashMap
ExtractedNgrams
(
Map
NgramsType
Int
))
->
Cmd
err
(
HashMap
ExtractedNgrams
(
Map
NgramsType
Int
,
TermsCount
))
extractNgramsT'
lang'
doc
=
do
let
source
=
text2ngrams
$
maybe
"Nothing"
identity
...
...
@@ -515,23 +521,23 @@ instance ExtractNgramsT HyperdataDocument
$
maybe
[
"Nothing"
]
(
T
.
splitOn
", "
)
$
_hd_authors
doc
terms
'
<-
map
(
enrichedTerms
(
lang'
^.
tt_lang
)
CoreNLP
NP
)
terms
WithCounts'
<-
map
(
\
(
t
,
cnt
)
->
(
enrichedTerms
(
lang'
^.
tt_lang
)
CoreNLP
NP
t
,
cnt
)
)
<$>
concat
<$>
liftBase
(
extractTerms
lang'
$
hasText
doc
)
pure
$
HashMap
.
fromList
$
[(
SimpleNgrams
source
,
Map
.
singleton
Sources
1
)
]
<>
[(
SimpleNgrams
i'
,
Map
.
singleton
Institutes
1
)
|
i'
<-
institutes
]
<>
[(
SimpleNgrams
a'
,
Map
.
singleton
Authors
1
)
|
a'
<-
authors
]
<>
[(
EnrichedNgrams
t'
,
Map
.
singleton
NgramsTerms
1
)
|
t'
<-
term
s'
]
$
[(
SimpleNgrams
source
,
(
Map
.
singleton
Sources
1
,
1
)
)
]
<>
[(
SimpleNgrams
i'
,
(
Map
.
singleton
Institutes
1
,
1
)
)
|
i'
<-
institutes
]
<>
[(
SimpleNgrams
a'
,
(
Map
.
singleton
Authors
1
,
1
)
)
|
a'
<-
authors
]
<>
[(
EnrichedNgrams
t'
,
(
Map
.
singleton
NgramsTerms
1
,
cnt'
))
|
(
t'
,
cnt'
)
<-
termsWithCount
s'
]
instance
(
ExtractNgramsT
a
,
HasText
a
)
=>
ExtractNgramsT
(
Node
a
)
where
extractNgramsT
l
(
Node
_
_
_
_
_
_
_
h
)
=
extractNgramsT
l
h
extractNgramsT
l
(
Node
{
_node_hyperdata
=
h
}
)
=
extractNgramsT
l
h
instance
HasText
a
=>
HasText
(
Node
a
)
where
hasText
(
Node
_
_
_
_
_
_
_
h
)
=
hasText
h
hasText
(
Node
{
_node_hyperdata
=
h
}
)
=
hasText
h
...
...
src/Gargantext/Database/Action/Flow/Utils.hs
View file @
da954a33
...
...
@@ -15,6 +15,7 @@ module Gargantext.Database.Action.Flow.Utils
import
Data.Map
(
Map
)
import
Data.HashMap.Strict
(
HashMap
)
import
Gargantext.Core.Types
(
TermsCount
)
import
Gargantext.Database.Admin.Types.Node
import
Gargantext.Database.Prelude
(
Cmd
)
import
Gargantext.Database.Query.Table.ContextNodeNgrams
...
...
@@ -29,24 +30,24 @@ import qualified Data.HashMap.Strict as HashMap
data
DocumentIdWithNgrams
a
b
=
DocumentIdWithNgrams
{
documentWithId
::
Indexed
NodeId
a
,
documentNgrams
::
HashMap
b
(
Map
NgramsType
Int
)
,
documentNgrams
::
HashMap
b
(
Map
NgramsType
Int
,
TermsCount
)
}
deriving
(
Show
)
insertDocNgrams
::
ListId
->
HashMap
(
Indexed
NgramsId
Ngrams
)
(
Map
NgramsType
(
Map
DocId
Int
))
->
HashMap
(
Indexed
NgramsId
Ngrams
)
(
Map
NgramsType
(
Map
DocId
(
Int
,
TermsCount
)
))
->
Cmd
err
Int
insertDocNgrams
lId
m
=
insertContextNodeNgrams
ns
insertDocNgrams
lId
m
=
do
printDebug
"[insertDocNgrams] ns"
ns
insertContextNodeNgrams
ns
where
ns
=
[
ContextNodeNgrams
docId
lId
(
ng
^.
index
)
(
ngramsTypeId
t
)
(
fromIntegral
i
)
cnt
|
(
ng
,
t2n2i
)
<-
HashMap
.
toList
m
,
(
t
,
n2i
)
<-
DM
.
toList
t2n2i
,
(
docId
,
i
)
<-
DM
.
toList
n2i
,
(
docId
,
(
i
,
cnt
)
)
<-
DM
.
toList
n2i
]
-- [(NodeId, {Ngrams: ({NgramsType: Int}, TermsCount)})]
-- {Ngrams: {NgramsType: {NodeId: (Int, TermsCount)}}}
src/Gargantext/Database/Action/Search.hs
View file @
da954a33
...
...
@@ -14,7 +14,10 @@ module Gargantext.Database.Action.Search where
import
Control.Arrow
(
returnA
)
import
Control.Lens
((
^.
))
import
qualified
Data.List
as
List
import
qualified
Data.Map
as
Map
import
Data.Maybe
import
qualified
Data.Set
as
Set
import
Data.Text
(
Text
,
unpack
,
intercalate
)
import
Data.Time
(
UTCTime
)
import
Gargantext.Core
...
...
@@ -26,8 +29,11 @@ import Gargantext.Database.Query.Filter
import
Gargantext.Database.Query.Join
(
leftJoin5
)
import
Gargantext.Database.Query.Table.Node
import
Gargantext.Database.Query.Table.Context
import
Gargantext.Database.Query.Table.ContextNodeNgrams
(
queryContextNodeNgramsTable
)
import
Gargantext.Database.Query.Table.NodeContext
import
Gargantext.Database.Query.Table.NodeContext_NodeContext
import
Gargantext.Database.Schema.ContextNodeNgrams
(
ContextNodeNgramsPoly
(
..
))
import
Gargantext.Database.Schema.Ngrams
(
NgramsType
(
..
))
import
Gargantext.Database.Schema.Node
import
Gargantext.Database.Schema.Context
import
Gargantext.Prelude
...
...
@@ -51,6 +57,75 @@ searchDocInDatabase p t = runOpaQuery (queryDocInDatabase p t)
restrict
-<
(
_ns_typename
row
)
.==
(
sqlInt4
$
toDBid
NodeDocument
)
returnA
-<
(
_ns_id
row
,
_ns_hyperdata
row
)
------------------------------------------------------------------------
-- | Search ngrams in documents, ranking them by TF-IDF. We narrow our
-- search only to map/candidate terms.
searchInCorpusWithNgrams
::
HasDBid
NodeType
=>
CorpusId
->
ListId
->
IsTrash
->
NgramsType
->
[[
Text
]]
->
Maybe
Offset
->
Maybe
Limit
->
Maybe
OrderBy
->
Cmd
err
[
FacetDoc
]
searchInCorpusWithNgrams
_cId
_lId
_t
_ngt
_q
_o
_l
_order
=
undefined
-- | Compute TF-IDF for all 'ngramIds' in given 'CorpusId'. In this
-- case only the "TF" part makes sense and so we only compute the
-- ratio of "number of times our terms appear in given document" and
-- "number of all terms in document" and return a sorted list of
-- document ids
tfidfAll
::
CorpusId
->
[
Int
]
->
Cmd
err
[
Int
]
tfidfAll
cId
ngramIds
=
do
let
ngramIdsSet
=
Set
.
fromList
ngramIds
docsWithNgrams
<-
runOpaQuery
(
queryCorpusWithNgrams
cId
ngramIds
)
::
Cmd
err
[(
Int
,
Int
,
Int
)]
-- NOTE The query returned docs with ANY ngramIds. We need to further
-- restrict to ALL ngramIds.
let
docsNgramsM
=
Map
.
fromListWith
(
Set
.
union
)
[
(
ctxId
,
Set
.
singleton
ngrams_id
)
|
(
ctxId
,
ngrams_id
,
_
)
<-
docsWithNgrams
]
let
docsWithAllNgramsS
=
Set
.
fromList
$
List
.
map
fst
$
List
.
filter
(
\
(
_
,
docNgrams
)
->
ngramIdsSet
==
Set
.
intersection
ngramIdsSet
docNgrams
)
$
Map
.
toList
docsNgramsM
let
docsWithAllNgrams
=
List
.
filter
(
\
(
ctxId
,
_
,
_
)
->
Set
.
member
ctxId
docsWithAllNgramsS
)
docsWithNgrams
printDebug
"[tfidfAll] docsWithAllNgrams"
docsWithAllNgrams
let
docsWithCounts
=
Map
.
fromListWith
(
+
)
[
(
ctxId
,
doc_count
)
|
(
ctxId
,
_
,
doc_count
)
<-
docsWithAllNgrams
]
printDebug
"[tfidfAll] docsWithCounts"
docsWithCounts
let
totals
=
[
(
ctxId
,
ngrams_id
,
fromIntegral
doc_count
::
Double
,
fromIntegral
(
fromMaybe
0
$
Map
.
lookup
ctxId
docsWithCounts
)
::
Double
)
|
(
ctxId
,
ngrams_id
,
doc_count
)
<-
docsWithAllNgrams
]
let
tfidf_sorted
=
List
.
sortOn
snd
[(
ctxId
,
doc_count
/
s
)
|
(
ctxId
,
_
,
doc_count
,
s
)
<-
totals
]
pure
$
List
.
map
fst
$
List
.
reverse
tfidf_sorted
-- | Query for searching the 'context_node_ngrams' table so that we
-- find docs with ANY given 'ngramIds'.
queryCorpusWithNgrams
::
CorpusId
->
[
Int
]
->
Select
(
Column
SqlInt4
,
Column
SqlInt4
,
Column
SqlInt4
)
queryCorpusWithNgrams
cId
ngramIds
=
proc
()
->
do
row
<-
queryContextNodeNgramsTable
-<
()
restrict
-<
(
_cnng_node_id
row
)
.==
(
pgNodeId
cId
)
restrict
-<
in_
(
sqlInt4
<$>
ngramIds
)
(
_cnng_ngrams_id
row
)
returnA
-<
(
_cnng_context_id
row
,
_cnng_ngrams_id
row
,
_cnng_doc_count
row
)
--returnA -< row
-- returnA -< ( _cnng_context_id row
-- , _cnng_node_id row
-- , _cnng_ngrams_id row
-- , _cnng_ngramsType row
-- , _cnng_weight row
-- , _cnng_doc_count row)
------------------------------------------------------------------------
-- | todo add limit and offset and order
searchInCorpus
::
HasDBid
NodeType
...
...
@@ -209,4 +284,3 @@ queryContactViaDoc =
)
)
->
Column
SqlBool
cond45
(
doc
,
(
corpus
,
(
_
,(
_
,
_
))))
=
doc
^.
cs_id
.==
corpus
^.
nc_context_id
src/Gargantext/Database/Query/Facet.hs
View file @
da954a33
src/Gargantext/Database/Query/Join.hs
View file @
da954a33
...
...
@@ -375,4 +375,3 @@ leftJoin9 q1 q2 q3 q4 q5 q6 q7 q8 q9
)
cond67
)
cond78
)
cond89
src/Gargantext/Database/Query/Table/ContextNodeNgrams.hs
View file @
da954a33
...
...
@@ -36,12 +36,13 @@ queryContextNodeNgramsTable = selectTable contextNodeNgramsTable
-- | Insert utils
insertContextNodeNgrams
::
[
ContextNodeNgrams
]
->
Cmd
err
Int
insertContextNodeNgrams
=
insertContextNodeNgramsW
.
map
(
\
(
ContextNodeNgrams
c
n
ng
nt
w
)
->
.
map
(
\
(
ContextNodeNgrams
c
n
ng
nt
w
dc
)
->
ContextNodeNgrams
(
pgContextId
c
)
(
pgNodeId
n
)
(
sqlInt4
ng
)
(
pgNgramsTypeId
nt
)
(
sqlDouble
w
)
(
sqlInt4
dc
)
)
insertContextNodeNgramsW
::
[
ContextNodeNgramsWrite
]
->
Cmd
err
Int
...
...
src/Gargantext/Database/Query/Table/Node/UpdateOpaleye.hs
View file @
da954a33
...
...
@@ -35,8 +35,9 @@ updateHyperdata i h = mkCmd $ \c -> putStrLn "before runUpdate_" >>
updateHyperdataQuery
::
ToJSON
a
=>
NodeId
->
a
->
Update
Int64
updateHyperdataQuery
i
h
=
seq
h'
$
trace
"updateHyperdataQuery: encoded JSON"
$
Update
{
uTable
=
nodeTable
,
uUpdateWith
=
updateEasy
(
\
(
Node
_ni
_nh
_nt
_nu
_np
_nn
_nd
_h
)
->
trace
"updating mate"
$
Node
_ni
_nh
_nt
_nu
_np
_nn
_nd
h'
,
uUpdateWith
=
updateEasy
(
\
(
Node
{
..
})
->
Node
{
_node_hyperdata
=
h'
,
..
}
-- -> trace "updating mate" $ Node _ni _nh _nt _nu _np _nn _nd h'
)
,
uWhere
=
(
\
row
->
{-trace "uWhere" $-}
_node_id
row
.==
pgNodeId
i
)
,
uReturning
=
rCount
...
...
@@ -63,5 +64,3 @@ updateNodesWithType_ :: ( HasNodeError err
updateNodesWithType_
nt
h
=
do
ns
<-
getNodesIdWithType
nt
mapM
(
\
n
->
updateHyperdata
n
h
)
ns
src/Gargantext/Database/Schema/ContextNodeNgrams.hs
View file @
da954a33
...
...
@@ -19,6 +19,7 @@ module Gargantext.Database.Schema.ContextNodeNgrams
where
import
Prelude
import
Gargantext.Core.Types
(
TermsCount
)
import
Gargantext.Database.Schema.Prelude
import
Gargantext.Database.Schema.Ngrams
(
NgramsTypeId
,
NgramsId
)
import
Gargantext.Database.Admin.Types.Node
...
...
@@ -26,15 +27,16 @@ import Gargantext.Database.Admin.Types.Node
type
ContextNodeNgrams
=
ContextNodeNgramsPoly
ContextId
ListId
NgramsId
NgramsTypeId
Double
ContextNodeNgramsPoly
ContextId
ListId
NgramsId
NgramsTypeId
Double
TermsCount
data
ContextNodeNgramsPoly
c
n
ngrams_id
ngt
w
data
ContextNodeNgramsPoly
c
n
ngrams_id
ngt
w
dc
=
ContextNodeNgrams
{
_cnng_context_id
::
!
c
,
_cnng_node_id
::
!
n
,
_cnng_ngrams_id
::
!
ngrams_id
,
_cnng_ngramsType
::
!
ngt
,
_cnng_weight
::
!
w
,
_cnng_doc_count
::
!
dc
}
deriving
(
Show
)
type
ContextNodeNgramsWrite
=
...
...
@@ -43,6 +45,7 @@ type ContextNodeNgramsWrite =
(
Column
SqlInt4
)
(
Column
SqlInt4
)
(
Column
SqlFloat8
)
(
Column
SqlInt4
)
type
ContextNodeNgramsRead
=
ContextNodeNgramsPoly
(
Column
SqlInt4
)
...
...
@@ -50,6 +53,7 @@ type ContextNodeNgramsRead =
(
Column
SqlInt4
)
(
Column
SqlInt4
)
(
Column
SqlFloat8
)
(
Column
SqlInt4
)
type
ContextNodeNgramsReadNull
=
ContextNodeNgramsPoly
(
Column
(
Nullable
SqlInt4
))
...
...
@@ -57,6 +61,7 @@ type ContextNodeNgramsReadNull =
(
Column
(
Nullable
SqlInt4
))
(
Column
(
Nullable
SqlInt4
))
(
Column
(
Nullable
SqlFloat8
))
(
Column
(
Nullable
SqlInt4
))
$
(
makeAdaptorAndInstance
"pContextNodeNgrams"
''
C
ontextNodeNgramsPoly
)
makeLenses
''
C
ontextNodeNgramsPoly
...
...
@@ -70,5 +75,6 @@ contextNodeNgramsTable = Table "context_node_ngrams"
,
_cnng_ngrams_id
=
requiredTableField
"ngrams_id"
,
_cnng_ngramsType
=
requiredTableField
"ngrams_type"
,
_cnng_weight
=
requiredTableField
"weight"
,
_cnng_doc_count
=
requiredTableField
"doc_count"
}
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment