Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
148
Issues
148
List
Board
Labels
Milestones
Merge Requests
12
Merge Requests
12
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
gargantext
haskell-gargantext
Commits
97edf05f
Commit
97edf05f
authored
Jan 19, 2021
by
Alexandre Delanoë
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
[FEAT] group ngrams, connected (testing now)
parent
bb469f39
Pipeline
#1354
failed with stage
Changes
7
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
Showing
7 changed files
with
94 additions
and
36 deletions
+94
-36
List.hs
src/Gargantext/Core/Text/List.hs
+20
-5
WithStem.hs
src/Gargantext/Core/Text/List/Group/WithStem.hs
+4
-0
Terms.hs
src/Gargantext/Core/Text/Terms.hs
+3
-3
Flow.hs
src/Gargantext/Database/Action/Flow.hs
+6
-2
NgramsPostag.hs
src/Gargantext/Database/Query/Table/NgramsPostag.hs
+31
-15
Ngrams.hs
src/Gargantext/Database/Schema/Ngrams.hs
+2
-1
Prelude.hs
src/Gargantext/Prelude.hs
+28
-10
No files found.
src/Gargantext/Core/Text/List.hs
View file @
97edf05f
...
...
@@ -37,9 +37,10 @@ import Gargantext.Database.Action.Metrics.TFICF (getTficf)
import
Gargantext.Database.Admin.Types.Node
(
NodeId
)
import
Gargantext.Database.Prelude
(
CmdM
)
import
Gargantext.Database.Query.Table.Node
(
defaultList
)
import
Gargantext.Database.Query.Table.NgramsPostag
(
selectLems
)
import
Gargantext.Database.Query.Table.Node.Error
(
HasNodeError
())
import
Gargantext.Database.Query.Tree.Error
(
HasTreeError
)
import
Gargantext.Database.Schema.Ngrams
(
NgramsType
(
..
))
import
Gargantext.Database.Schema.Ngrams
(
NgramsType
(
..
)
,
Ngrams
(
..
)
)
import
Gargantext.Prelude
import
qualified
Data.HashMap.Strict
as
HashMap
import
qualified
Data.List
as
List
...
...
@@ -62,12 +63,12 @@ buildNgramsLists :: ( RepoCmdM env err m
,
HasTreeError
err
,
HasNodeError
err
)
=>
User
->
GroupParams
=>
GroupParams
->
User
->
UserCorpusId
->
MasterCorpusId
->
m
(
Map
NgramsType
[
NgramsElement
])
buildNgramsLists
user
gp
uCid
mCid
=
do
buildNgramsLists
gp
user
uCid
mCid
=
do
ngTerms
<-
buildNgramsTermsList
user
uCid
mCid
gp
(
NgramsTerms
,
MapListSize
350
)
othersTerms
<-
mapM
(
buildNgramsOthersList
user
uCid
GroupIdentity
)
[
(
Authors
,
MapListSize
9
)
...
...
@@ -132,6 +133,20 @@ buildNgramsOthersList user uCid _groupParams (nt, MapListSize mapListSize) = do
)]
getGroupParams
::
(
HasNodeError
err
,
CmdM
env
err
m
,
RepoCmdM
env
err
m
,
HasTreeError
err
)
=>
GroupParams
->
Set
Ngrams
->
m
GroupParams
getGroupParams
gp
@
(
GroupWithPosTag
l
a
_m
)
ng
=
do
hashMap
<-
HashMap
.
fromList
<$>
selectLems
l
a
(
Set
.
toList
ng
)
pure
$
over
gwl_map
(
\
x
->
x
<>
hashMap
)
gp
getGroupParams
gp
_
=
pure
gp
-- TODO use ListIds
buildNgramsTermsList
::
(
HasNodeError
err
,
CmdM
env
err
m
...
...
@@ -160,7 +175,7 @@ buildNgramsTermsList user uCid mCid groupParams (nt, _mapListSize)= do
let
socialLists_Stemmed
=
addScoreStem
groupParams
(
HashMap
.
keysSet
allTerms
)
socialLists
printDebug
"socialLists_Stemmed"
socialLists_Stemmed
let
groupedWithList
=
toGroupedTree
{- groupParams -}
socialLists_Stemmed
allTerms
let
groupedWithList
=
toGroupedTree
socialLists_Stemmed
allTerms
(
stopTerms
,
candidateTerms
)
=
HashMap
.
partition
((
==
Just
StopTerm
)
.
viewListType
)
$
view
flc_scores
groupedWithList
...
...
src/Gargantext/Core/Text/List/Group/WithStem.hs
View file @
97edf05f
...
...
@@ -17,6 +17,7 @@ Portability : POSIX
module
Gargantext.Core.Text.List.Group.WithStem
where
import
Control.Lens
(
makeLenses
)
import
Data.HashMap.Strict
(
HashMap
)
import
Data.HashSet
(
HashSet
)
import
Data.Map
(
Map
)
...
...
@@ -53,6 +54,7 @@ data StopSize = StopSize {unStopSize :: !Int}
-- discussed. Main purpose of this is offering
-- a first grouping option to user and get some
-- enriched data to better learn and improve that algo
-- | Lenses instances at the end of this file
data
GroupParams
=
GroupParams
{
unGroupParams_lang
::
!
Lang
,
unGroupParams_len
::
!
Int
,
unGroupParams_limit
::
!
Int
...
...
@@ -124,3 +126,5 @@ toNgramsPatch children = NgramsPatch children' Patch.Keep
$
PatchMap
.
fromList
$
List
.
zip
children
(
List
.
cycle
[
addPatch
])
-- | Instances
makeLenses
''
G
roupParams
src/Gargantext/Core/Text/Terms.hs
View file @
97edf05f
...
...
@@ -151,12 +151,12 @@ insertExtractedNgrams :: [ ExtractedNgrams ] -> Cmd err (HashMap Text NgramsId)
insertExtractedNgrams
ngs
=
do
let
(
s
,
e
)
=
List
.
partition
isSimpleNgrams
ngs
m1
<-
insertNgrams
(
map
unSimpleNgrams
s
)
printDebug
"others"
m1
--
printDebug "others" m1
m2
<-
insertNgramsPostag
(
map
unEnrichedNgrams
e
)
printDebug
"terms"
m2
--
printDebug "terms" m2
let
result
=
HashMap
.
union
s
[
m1
,
m2
]
let
result
=
HashMap
.
union
m1
m2
pure
result
isSimpleNgrams
::
ExtractedNgrams
->
Bool
...
...
src/Gargantext/Database/Action/Flow.hs
View file @
97edf05f
...
...
@@ -68,7 +68,7 @@ import Gargantext.Core.Ext.IMT (toSchoolName)
import
Gargantext.Core.Ext.IMTUser
(
deserialiseImtUsersFromFile
)
import
Gargantext.Core.Flow.Types
import
Gargantext.Core.Text
import
Gargantext.Core.Text.List.Group.WithStem
(
StopSize
(
..
),
GroupParams
(
..
))
import
Gargantext.Core.Text.List.Group.WithStem
(
{-StopSize(..),-}
GroupParams
(
..
))
import
Gargantext.Core.Text.Corpus.Parsers
(
parseFile
,
FileFormat
)
import
Gargantext.Core.Text.List
(
buildNgramsLists
)
import
Gargantext.Core.Text.Terms
...
...
@@ -231,7 +231,11 @@ flowCorpusUser l user corpusName ctype ids = do
-- User List Flow
(
masterUserId
,
_masterRootId
,
masterCorpusId
)
<-
getOrMk_RootWithCorpus
(
UserName
userMaster
)
(
Left
""
)
ctype
ngs
<-
buildNgramsLists
user
(
GroupParams
l
2
3
(
StopSize
3
))
userCorpusId
masterCorpusId
-- let gp = (GroupParams l 2 3 (StopSize 3))
let
gp
=
GroupWithPosTag
l
CoreNLP
HashMap
.
empty
ngs
<-
buildNgramsLists
gp
user
userCorpusId
masterCorpusId
_userListId
<-
flowList_DbRepo
listId
ngs
_mastListId
<-
getOrMkList
masterCorpusId
masterUserId
-- _ <- insertOccsUpdates userCorpusId mastListId
...
...
src/Gargantext/Database/Query/Table/NgramsPostag.hs
View file @
97edf05f
...
...
@@ -16,7 +16,7 @@ Portability : POSIX
module
Gargantext.Database.Query.Table.NgramsPostag
where
import
Control.Lens
(
view
)
import
Control.Lens
(
view
,
(
^.
)
)
import
Data.HashMap.Strict
(
HashMap
)
import
Data.Hashable
(
Hashable
)
import
Data.Text
(
Text
)
...
...
@@ -25,6 +25,7 @@ import Gargantext.Core.Types
import
Gargantext.Database.Prelude
(
Cmd
,
runPGSQuery
)
import
Gargantext.Database.Schema.Ngrams
import
Gargantext.Database.Schema.Prelude
import
Gargantext.Database.Query.Table.Ngrams
import
Gargantext.Database.Types
import
Gargantext.Prelude
import
qualified
Data.HashMap.Strict
as
HashMap
...
...
@@ -38,11 +39,10 @@ data NgramsPostag = NgramsPostag { _np_lang :: Lang
,
_np_lem
::
Ngrams
}
deriving
(
Eq
,
Ord
,
Generic
,
Show
)
makeLenses
''
N
gramsPostag
instance
Hashable
NgramsPostag
type
NgramsPostagInsert
=
(
Int
,
Int
,
Text
...
...
@@ -64,12 +64,25 @@ toInsert (NgramsPostag l a p form lem) =
)
insertNgramsPostag
::
[
NgramsPostag
]
->
Cmd
err
(
HashMap
Text
NgramsId
)
insertNgramsPostag
n
s
=
if
List
.
null
n
s
insertNgramsPostag
x
s
=
if
List
.
null
x
s
then
pure
HashMap
.
empty
else
HashMap
.
fromList
<$>
map
(
\
(
Indexed
t
i
)
->
(
t
,
i
))
<$>
insertNgramsPostag'
(
map
toInsert
ns
)
else
do
-- We do not store the lem if it equals to its self form
let
(
ns
,
nps
)
=
List
.
partition
(
\
np
->
np
^.
np_form
.
ngramsTerms
/=
np
^.
np_lem
.
ngramsTerms
)
xs
ns'
<-
insertNgrams
(
map
(
view
np_form
)
ns
)
nps'
<-
HashMap
.
fromList
<$>
map
(
\
(
Indexed
t
i
)
->
(
t
,
i
))
<$>
insertNgramsPostag'
(
map
toInsert
ns
)
pure
$
HashMap
.
union
ns'
nps'
insertNgramsPostag'
::
[
NgramsPostagInsert
]
->
Cmd
err
[
Indexed
Text
Int
]
insertNgramsPostag'
ns
=
runPGSQuery
queryInsertNgramsPostag
(
PGS
.
Only
$
Values
fields
ns
)
...
...
@@ -119,13 +132,15 @@ queryInsertNgramsPostag = [sql|
)
------------------------------------------------
------------------------------------------------
, ins_postag AS ( INSERT INTO ngrams_postag (lang_id, algo_id, postag, ngrams_id, lemm_id,score)
SELECT ir.lang_id, ir.algo_id, ir.postag, form.id, lem.id, 1
, ins_postag AS (
INSERT INTO ngrams_postag (lang_id, algo_id, postag, ngrams_id, lemm_id,score)
SELECT ir.lang_id, ir.algo_id, ir.postag, form.id, lem.id, count(*) as s
FROM input_rows ir
JOIN ins_form_ret form ON form.terms = ir.form
JOIN ins_lem_ret lem ON lem.terms = ir.lem
GROUP BY ir.lang_id, ir.algo_id, ir.postag, form.id, lem.id
ORDER BY s DESC
LIMIT 1
ON CONFLICT (lang_id,algo_id,postag,ngrams_id,lemm_id)
DO UPDATE SET score = ngrams_postag.score + 1
)
...
...
@@ -135,9 +150,10 @@ SELECT terms,id FROM ins_form_ret
|]
selectLems
::
[
Ngrams
]
->
Cmd
err
[(
Form
,
Lem
)]
selectLems
ns
=
runPGSQuery
querySelectLems
(
PGS
.
Only
$
Values
fields
(
map
toRow
ns
))
-- TODO add lang and postag algo
-- TODO remove when form == lem in insert
selectLems
::
Lang
->
PosTagAlgo
->
[
Ngrams
]
->
Cmd
err
[(
Form
,
Lem
)]
selectLems
l
a
ns
=
runPGSQuery
querySelectLems
(
PGS
.
Only
$
Values
fields
(
map
toRow
ns
))
where
fields
=
map
(
\
t
->
QualifiedIdentifier
Nothing
t
)
[
"text"
,
"int4"
]
...
...
src/Gargantext/Database/Schema/Ngrams.hs
View file @
97edf05f
...
...
@@ -180,7 +180,8 @@ instance Functor NgramsT where
-----------------------------------------------------------------------
withMap
::
HashMap
Text
NgramsId
->
Text
->
NgramsId
withMap
m
n
=
maybe
(
panic
"withMap: should not happen"
)
identity
(
HashMap
.
lookup
n
m
)
withMap
m
n
=
maybe
(
panic
$
"[G.D.S.Ngrams.withMap] Should not happen"
<>
(
cs
$
show
n
))
identity
(
HashMap
.
lookup
n
m
)
indexNgramsT
::
HashMap
Text
NgramsId
->
NgramsT
Ngrams
->
NgramsT
(
Indexed
Int
Ngrams
)
indexNgramsT
=
fmap
.
indexNgramsWith
.
withMap
...
...
src/Gargantext/Prelude.hs
View file @
97edf05f
...
...
@@ -16,21 +16,21 @@ Portability : POSIX
module
Gargantext.Prelude
(
module
Gargantext
.
Prelude
,
module
Protolude
,
headMay
,
lastMay
,
module
GHC
.
Err
.
Located
,
module
Text
.
Show
,
module
Text
.
Read
,
cs
,
module
Data
.
Maybe
,
round
,
sortWith
,
module
Prelude
,
MonadBase
(
..
)
,
Typeable
,
cs
,
headMay
,
lastMay
,
sortWith
,
round
)
where
import
Control.Monad.Base
(
MonadBase
(
..
))
import
Data.Set
(
Set
)
import
GHC.Exts
(
sortWith
)
import
GHC.Err.Located
(
undefined
)
import
GHC.Real
(
round
)
...
...
@@ -71,15 +71,16 @@ import Prelude (Enum, Bounded, minBound, maxBound, putStrLn)
-- TODO import functions optimized in Utils.Count
-- import Protolude hiding (head, last, all, any, sum, product, length)
-- import Gargantext.Utils.Count
import
qualified
Data.List
as
L
hiding
(
head
,
sum
)
import
qualified
Control.Monad
as
M
import
qualified
Data.Map
as
M
import
Data.Map.Strict
(
insertWith
)
import
qualified
Data.Vector
as
V
import
Data.String.Conversions
(
cs
)
import
Safe
(
headMay
,
lastMay
,
initMay
,
tailMay
)
import
Text.Show
(
Show
(),
show
)
import
Text.Read
(
Read
())
import
Data.String.Conversions
(
cs
)
import
Text.Show
(
Show
(),
show
)
import
qualified
Control.Monad
as
M
import
qualified
Data.List
as
L
hiding
(
head
,
sum
)
import
qualified
Data.Map
as
M
import
qualified
Data.Set
as
Set
import
qualified
Data.Vector
as
V
printDebug
::
(
Show
a
,
MonadBase
IO
m
)
=>
[
Char
]
->
a
->
m
()
...
...
@@ -338,3 +339,20 @@ instance Monoid Integer where
instance
Semigroup
Integer
where
(
<>
)
a
b
=
a
+
b
------------------------------------------------------------------------
hasDuplicates
::
Ord
a
=>
[
a
]
->
Bool
hasDuplicates
=
hasDuplicatesWith
Set
.
empty
hasDuplicatesWith
::
Ord
a
=>
Set
a
->
[
a
]
->
Bool
hasDuplicatesWith
_seen
[]
=
False
-- base case: empty lists never contain duplicates
hasDuplicatesWith
seen
(
x
:
xs
)
=
-- If we have seen the current item before, we can short-circuit; otherwise,
-- we'll add it the the set of previously seen items and process the rest of the
-- list against that.
x
`
Set
.
member
`
seen
||
hasDuplicatesWith
(
Set
.
insert
x
seen
)
xs
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment