Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
H
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Przemyslaw Kaminski
haskell-gargantext
Commits
97edf05f
Commit
97edf05f
authored
Jan 19, 2021
by
Alexandre Delanoë
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
[FEAT] group ngrams, connected (testing now)
parent
bb469f39
Changes
7
Hide whitespace changes
Inline
Side-by-side
Showing
7 changed files
with
94 additions
and
36 deletions
+94
-36
List.hs
src/Gargantext/Core/Text/List.hs
+20
-5
WithStem.hs
src/Gargantext/Core/Text/List/Group/WithStem.hs
+4
-0
Terms.hs
src/Gargantext/Core/Text/Terms.hs
+3
-3
Flow.hs
src/Gargantext/Database/Action/Flow.hs
+6
-2
NgramsPostag.hs
src/Gargantext/Database/Query/Table/NgramsPostag.hs
+31
-15
Ngrams.hs
src/Gargantext/Database/Schema/Ngrams.hs
+2
-1
Prelude.hs
src/Gargantext/Prelude.hs
+28
-10
No files found.
src/Gargantext/Core/Text/List.hs
View file @
97edf05f
...
...
@@ -37,9 +37,10 @@ import Gargantext.Database.Action.Metrics.TFICF (getTficf)
import
Gargantext.Database.Admin.Types.Node
(
NodeId
)
import
Gargantext.Database.Prelude
(
CmdM
)
import
Gargantext.Database.Query.Table.Node
(
defaultList
)
import
Gargantext.Database.Query.Table.NgramsPostag
(
selectLems
)
import
Gargantext.Database.Query.Table.Node.Error
(
HasNodeError
())
import
Gargantext.Database.Query.Tree.Error
(
HasTreeError
)
import
Gargantext.Database.Schema.Ngrams
(
NgramsType
(
..
))
import
Gargantext.Database.Schema.Ngrams
(
NgramsType
(
..
)
,
Ngrams
(
..
)
)
import
Gargantext.Prelude
import
qualified
Data.HashMap.Strict
as
HashMap
import
qualified
Data.List
as
List
...
...
@@ -62,12 +63,12 @@ buildNgramsLists :: ( RepoCmdM env err m
,
HasTreeError
err
,
HasNodeError
err
)
=>
User
->
GroupParams
=>
GroupParams
->
User
->
UserCorpusId
->
MasterCorpusId
->
m
(
Map
NgramsType
[
NgramsElement
])
buildNgramsLists
user
gp
uCid
mCid
=
do
buildNgramsLists
gp
user
uCid
mCid
=
do
ngTerms
<-
buildNgramsTermsList
user
uCid
mCid
gp
(
NgramsTerms
,
MapListSize
350
)
othersTerms
<-
mapM
(
buildNgramsOthersList
user
uCid
GroupIdentity
)
[
(
Authors
,
MapListSize
9
)
...
...
@@ -132,6 +133,20 @@ buildNgramsOthersList user uCid _groupParams (nt, MapListSize mapListSize) = do
)]
getGroupParams
::
(
HasNodeError
err
,
CmdM
env
err
m
,
RepoCmdM
env
err
m
,
HasTreeError
err
)
=>
GroupParams
->
Set
Ngrams
->
m
GroupParams
getGroupParams
gp
@
(
GroupWithPosTag
l
a
_m
)
ng
=
do
hashMap
<-
HashMap
.
fromList
<$>
selectLems
l
a
(
Set
.
toList
ng
)
pure
$
over
gwl_map
(
\
x
->
x
<>
hashMap
)
gp
getGroupParams
gp
_
=
pure
gp
-- TODO use ListIds
buildNgramsTermsList
::
(
HasNodeError
err
,
CmdM
env
err
m
...
...
@@ -160,7 +175,7 @@ buildNgramsTermsList user uCid mCid groupParams (nt, _mapListSize)= do
let
socialLists_Stemmed
=
addScoreStem
groupParams
(
HashMap
.
keysSet
allTerms
)
socialLists
printDebug
"socialLists_Stemmed"
socialLists_Stemmed
let
groupedWithList
=
toGroupedTree
{- groupParams -}
socialLists_Stemmed
allTerms
let
groupedWithList
=
toGroupedTree
socialLists_Stemmed
allTerms
(
stopTerms
,
candidateTerms
)
=
HashMap
.
partition
((
==
Just
StopTerm
)
.
viewListType
)
$
view
flc_scores
groupedWithList
...
...
src/Gargantext/Core/Text/List/Group/WithStem.hs
View file @
97edf05f
...
...
@@ -17,6 +17,7 @@ Portability : POSIX
module
Gargantext.Core.Text.List.Group.WithStem
where
import
Control.Lens
(
makeLenses
)
import
Data.HashMap.Strict
(
HashMap
)
import
Data.HashSet
(
HashSet
)
import
Data.Map
(
Map
)
...
...
@@ -53,6 +54,7 @@ data StopSize = StopSize {unStopSize :: !Int}
-- discussed. Main purpose of this is offering
-- a first grouping option to user and get some
-- enriched data to better learn and improve that algo
-- | Lenses instances at the end of this file
data
GroupParams
=
GroupParams
{
unGroupParams_lang
::
!
Lang
,
unGroupParams_len
::
!
Int
,
unGroupParams_limit
::
!
Int
...
...
@@ -124,3 +126,5 @@ toNgramsPatch children = NgramsPatch children' Patch.Keep
$
PatchMap
.
fromList
$
List
.
zip
children
(
List
.
cycle
[
addPatch
])
-- | Instances
makeLenses
''
G
roupParams
src/Gargantext/Core/Text/Terms.hs
View file @
97edf05f
...
...
@@ -151,12 +151,12 @@ insertExtractedNgrams :: [ ExtractedNgrams ] -> Cmd err (HashMap Text NgramsId)
insertExtractedNgrams
ngs
=
do
let
(
s
,
e
)
=
List
.
partition
isSimpleNgrams
ngs
m1
<-
insertNgrams
(
map
unSimpleNgrams
s
)
printDebug
"others"
m1
--
printDebug "others" m1
m2
<-
insertNgramsPostag
(
map
unEnrichedNgrams
e
)
printDebug
"terms"
m2
--
printDebug "terms" m2
let
result
=
HashMap
.
union
s
[
m1
,
m2
]
let
result
=
HashMap
.
union
m1
m2
pure
result
isSimpleNgrams
::
ExtractedNgrams
->
Bool
...
...
src/Gargantext/Database/Action/Flow.hs
View file @
97edf05f
...
...
@@ -68,7 +68,7 @@ import Gargantext.Core.Ext.IMT (toSchoolName)
import
Gargantext.Core.Ext.IMTUser
(
deserialiseImtUsersFromFile
)
import
Gargantext.Core.Flow.Types
import
Gargantext.Core.Text
import
Gargantext.Core.Text.List.Group.WithStem
(
StopSize
(
..
),
GroupParams
(
..
))
import
Gargantext.Core.Text.List.Group.WithStem
(
{-StopSize(..),-}
GroupParams
(
..
))
import
Gargantext.Core.Text.Corpus.Parsers
(
parseFile
,
FileFormat
)
import
Gargantext.Core.Text.List
(
buildNgramsLists
)
import
Gargantext.Core.Text.Terms
...
...
@@ -231,7 +231,11 @@ flowCorpusUser l user corpusName ctype ids = do
-- User List Flow
(
masterUserId
,
_masterRootId
,
masterCorpusId
)
<-
getOrMk_RootWithCorpus
(
UserName
userMaster
)
(
Left
""
)
ctype
ngs
<-
buildNgramsLists
user
(
GroupParams
l
2
3
(
StopSize
3
))
userCorpusId
masterCorpusId
-- let gp = (GroupParams l 2 3 (StopSize 3))
let
gp
=
GroupWithPosTag
l
CoreNLP
HashMap
.
empty
ngs
<-
buildNgramsLists
gp
user
userCorpusId
masterCorpusId
_userListId
<-
flowList_DbRepo
listId
ngs
_mastListId
<-
getOrMkList
masterCorpusId
masterUserId
-- _ <- insertOccsUpdates userCorpusId mastListId
...
...
src/Gargantext/Database/Query/Table/NgramsPostag.hs
View file @
97edf05f
...
...
@@ -16,7 +16,7 @@ Portability : POSIX
module
Gargantext.Database.Query.Table.NgramsPostag
where
import
Control.Lens
(
view
)
import
Control.Lens
(
view
,
(
^.
)
)
import
Data.HashMap.Strict
(
HashMap
)
import
Data.Hashable
(
Hashable
)
import
Data.Text
(
Text
)
...
...
@@ -25,6 +25,7 @@ import Gargantext.Core.Types
import
Gargantext.Database.Prelude
(
Cmd
,
runPGSQuery
)
import
Gargantext.Database.Schema.Ngrams
import
Gargantext.Database.Schema.Prelude
import
Gargantext.Database.Query.Table.Ngrams
import
Gargantext.Database.Types
import
Gargantext.Prelude
import
qualified
Data.HashMap.Strict
as
HashMap
...
...
@@ -38,11 +39,10 @@ data NgramsPostag = NgramsPostag { _np_lang :: Lang
,
_np_lem
::
Ngrams
}
deriving
(
Eq
,
Ord
,
Generic
,
Show
)
makeLenses
''
N
gramsPostag
instance
Hashable
NgramsPostag
type
NgramsPostagInsert
=
(
Int
,
Int
,
Text
...
...
@@ -64,12 +64,25 @@ toInsert (NgramsPostag l a p form lem) =
)
insertNgramsPostag
::
[
NgramsPostag
]
->
Cmd
err
(
HashMap
Text
NgramsId
)
insertNgramsPostag
n
s
=
if
List
.
null
n
s
insertNgramsPostag
x
s
=
if
List
.
null
x
s
then
pure
HashMap
.
empty
else
HashMap
.
fromList
<$>
map
(
\
(
Indexed
t
i
)
->
(
t
,
i
))
<$>
insertNgramsPostag'
(
map
toInsert
ns
)
else
do
-- We do not store the lem if it equals to its self form
let
(
ns
,
nps
)
=
List
.
partition
(
\
np
->
np
^.
np_form
.
ngramsTerms
/=
np
^.
np_lem
.
ngramsTerms
)
xs
ns'
<-
insertNgrams
(
map
(
view
np_form
)
ns
)
nps'
<-
HashMap
.
fromList
<$>
map
(
\
(
Indexed
t
i
)
->
(
t
,
i
))
<$>
insertNgramsPostag'
(
map
toInsert
ns
)
pure
$
HashMap
.
union
ns'
nps'
insertNgramsPostag'
::
[
NgramsPostagInsert
]
->
Cmd
err
[
Indexed
Text
Int
]
insertNgramsPostag'
ns
=
runPGSQuery
queryInsertNgramsPostag
(
PGS
.
Only
$
Values
fields
ns
)
...
...
@@ -119,13 +132,15 @@ queryInsertNgramsPostag = [sql|
)
------------------------------------------------
------------------------------------------------
, ins_postag AS ( INSERT INTO ngrams_postag (lang_id, algo_id, postag, ngrams_id, lemm_id,score)
SELECT ir.lang_id, ir.algo_id, ir.postag, form.id, lem.id, 1
, ins_postag AS (
INSERT INTO ngrams_postag (lang_id, algo_id, postag, ngrams_id, lemm_id,score)
SELECT ir.lang_id, ir.algo_id, ir.postag, form.id, lem.id, count(*) as s
FROM input_rows ir
JOIN ins_form_ret form ON form.terms = ir.form
JOIN ins_lem_ret lem ON lem.terms = ir.lem
GROUP BY ir.lang_id, ir.algo_id, ir.postag, form.id, lem.id
ORDER BY s DESC
LIMIT 1
ON CONFLICT (lang_id,algo_id,postag,ngrams_id,lemm_id)
DO UPDATE SET score = ngrams_postag.score + 1
)
...
...
@@ -135,9 +150,10 @@ SELECT terms,id FROM ins_form_ret
|]
selectLems
::
[
Ngrams
]
->
Cmd
err
[(
Form
,
Lem
)]
selectLems
ns
=
runPGSQuery
querySelectLems
(
PGS
.
Only
$
Values
fields
(
map
toRow
ns
))
-- TODO add lang and postag algo
-- TODO remove when form == lem in insert
selectLems
::
Lang
->
PosTagAlgo
->
[
Ngrams
]
->
Cmd
err
[(
Form
,
Lem
)]
selectLems
l
a
ns
=
runPGSQuery
querySelectLems
(
PGS
.
Only
$
Values
fields
(
map
toRow
ns
))
where
fields
=
map
(
\
t
->
QualifiedIdentifier
Nothing
t
)
[
"text"
,
"int4"
]
...
...
src/Gargantext/Database/Schema/Ngrams.hs
View file @
97edf05f
...
...
@@ -180,7 +180,8 @@ instance Functor NgramsT where
-----------------------------------------------------------------------
withMap
::
HashMap
Text
NgramsId
->
Text
->
NgramsId
withMap
m
n
=
maybe
(
panic
"withMap: should not happen"
)
identity
(
HashMap
.
lookup
n
m
)
withMap
m
n
=
maybe
(
panic
$
"[G.D.S.Ngrams.withMap] Should not happen"
<>
(
cs
$
show
n
))
identity
(
HashMap
.
lookup
n
m
)
indexNgramsT
::
HashMap
Text
NgramsId
->
NgramsT
Ngrams
->
NgramsT
(
Indexed
Int
Ngrams
)
indexNgramsT
=
fmap
.
indexNgramsWith
.
withMap
...
...
src/Gargantext/Prelude.hs
View file @
97edf05f
...
...
@@ -16,21 +16,21 @@ Portability : POSIX
module
Gargantext.Prelude
(
module
Gargantext
.
Prelude
,
module
Protolude
,
headMay
,
lastMay
,
module
GHC
.
Err
.
Located
,
module
Text
.
Show
,
module
Text
.
Read
,
cs
,
module
Data
.
Maybe
,
round
,
sortWith
,
module
Prelude
,
MonadBase
(
..
)
,
Typeable
,
cs
,
headMay
,
lastMay
,
sortWith
,
round
)
where
import
Control.Monad.Base
(
MonadBase
(
..
))
import
Data.Set
(
Set
)
import
GHC.Exts
(
sortWith
)
import
GHC.Err.Located
(
undefined
)
import
GHC.Real
(
round
)
...
...
@@ -71,15 +71,16 @@ import Prelude (Enum, Bounded, minBound, maxBound, putStrLn)
-- TODO import functions optimized in Utils.Count
-- import Protolude hiding (head, last, all, any, sum, product, length)
-- import Gargantext.Utils.Count
import
qualified
Data.List
as
L
hiding
(
head
,
sum
)
import
qualified
Control.Monad
as
M
import
qualified
Data.Map
as
M
import
Data.Map.Strict
(
insertWith
)
import
qualified
Data.Vector
as
V
import
Data.String.Conversions
(
cs
)
import
Safe
(
headMay
,
lastMay
,
initMay
,
tailMay
)
import
Text.Show
(
Show
(),
show
)
import
Text.Read
(
Read
())
import
Data.String.Conversions
(
cs
)
import
Text.Show
(
Show
(),
show
)
import
qualified
Control.Monad
as
M
import
qualified
Data.List
as
L
hiding
(
head
,
sum
)
import
qualified
Data.Map
as
M
import
qualified
Data.Set
as
Set
import
qualified
Data.Vector
as
V
printDebug
::
(
Show
a
,
MonadBase
IO
m
)
=>
[
Char
]
->
a
->
m
()
...
...
@@ -338,3 +339,20 @@ instance Monoid Integer where
instance
Semigroup
Integer
where
(
<>
)
a
b
=
a
+
b
------------------------------------------------------------------------
hasDuplicates
::
Ord
a
=>
[
a
]
->
Bool
hasDuplicates
=
hasDuplicatesWith
Set
.
empty
hasDuplicatesWith
::
Ord
a
=>
Set
a
->
[
a
]
->
Bool
hasDuplicatesWith
_seen
[]
=
False
-- base case: empty lists never contain duplicates
hasDuplicatesWith
seen
(
x
:
xs
)
=
-- If we have seen the current item before, we can short-circuit; otherwise,
-- we'll add it the the set of previously seen items and process the rest of the
-- list against that.
x
`
Set
.
member
`
seen
||
hasDuplicatesWith
(
Set
.
insert
x
seen
)
xs
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment