Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
149
Issues
149
List
Board
Labels
Milestones
Merge Requests
5
Merge Requests
5
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
gargantext
haskell-gargantext
Commits
be879b1e
Verified
Commit
be879b1e
authored
Jan 30, 2025
by
Przemyslaw Kaminski
1
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
[ngrams] code fixes according to review
Related MR:
!378
parent
bf89561b
Pipeline
#7287
failed with stages
in 45 minutes and 56 seconds
Changes
6
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
28 additions
and
60 deletions
+28
-60
Import.hs
bin/gargantext-cli/CLI/Import.hs
+0
-1
Share.hs
src/Gargantext/API/Node/Share.hs
+15
-19
Flow.hs
src/Gargantext/Database/Action/Flow.hs
+1
-19
Utils.hs
src/Gargantext/Database/Action/Flow/Utils.hs
+5
-14
Context.hs
src/Gargantext/Database/Schema/Context.hs
+2
-0
Terms.hs
test/Test/Ngrams/Terms.hs
+5
-7
No files found.
bin/gargantext-cli/CLI/Import.hs
View file @
be879b1e
...
...
@@ -75,7 +75,6 @@ import_p = fmap CCMD_import $ ImportArgs
<*>
(
option
str
(
long
"user"
)
)
<*>
(
option
str
(
long
"name"
)
)
<*>
settings_p
-- <*> (fmap Limit ( option auto ( long "limit" <> metavar "INT" <> help "The limit for the query") ))
<*>
(
option
str
(
long
"corpus-path"
<>
help
"Path to corpus file"
)
)
function_p
::
String
->
Either
String
ImportFunction
...
...
src/Gargantext/API/Node/Share.hs
View file @
be879b1e
...
...
@@ -55,25 +55,21 @@ api userInviting nId (ShareTeamParams user') = do
pure
u
Left
_err
->
do
username'
<-
getUsername
userInviting
if
username'
`
List
.
elem
`
arbitraryUsername
then
do
-- printDebug "[G.A.N.Share.api]" ("Demo users are not allowed to invite" :: Text)
pure
()
else
do
-- TODO better analysis of the composition of what is shared
children
<-
findNodesWithType
nId
[
NodeList
]
[
NodeFolderShared
,
NodeTeam
,
NodeFolder
,
NodeCorpus
]
_
<-
if
List
.
null
children
then
do
-- printDebug "[G.A.N.Share.api]" ("Invitation is enabled if you share a corpus at least" :: Text)
pure
$
UnsafeMkUserId
0
else
do
-- printDebug "[G.A.N.Share.api]" ("Your invitation is sent to: " <> user'')
newUser
user''
pure
()
unless
(
username'
`
List
.
elem
`
arbitraryUsername
)
$
do
-- TODO better analysis of the composition of what is shared
children
<-
findNodesWithType
nId
[
NodeList
]
[
NodeFolderShared
,
NodeTeam
,
NodeFolder
,
NodeCorpus
]
_
<-
if
List
.
null
children
then
do
-- printDebug "[G.A.N.Share.api]" ("Invitation is enabled if you share a corpus at least" :: Text)
pure
$
UnsafeMkUserId
0
else
do
-- printDebug "[G.A.N.Share.api]" ("Your invitation is sent to: " <> user'')
newUser
user''
pure
()
pure
u
fromIntegral
<$>
DB
.
shareNodeWith
(
ShareNodeWith_User
NodeFolderShared
(
UserName
user
))
nId
...
...
src/Gargantext/Database/Action/Flow.hs
View file @
be879b1e
...
...
@@ -389,12 +389,6 @@ buildSocialList l user userCorpusId listId ctype mfslw = do
<-
getOrMkRootWithCorpus
MkCorpusUserMaster
ctype
nlpServer
<-
view
(
nlpServerGet
l
)
--let gp = (GroupParams l 2 3 (StopSize 3))
-- Here the PosTagAlgo should be chosen according to the Lang
-- let gp = GroupParams { unGroupParams_lang = l
-- , unGroupParams_len = 10
-- , unGroupParams_limit = 10
-- , unGroupParams_stopSize = StopSize 10 }
let
gp
=
GroupWithPosTag
l
nlpServer
HashMap
.
empty
ngs
<-
buildNgramsLists
user
userCorpusId
masterCorpusId
mfslw
gp
...
...
@@ -424,7 +418,6 @@ insertMasterDocs ncs c lang hs = do
-- add documents to the corpus (create node_node link)
-- this will enable global database monitoring
-- maps :: IO Map Ngrams (Map NgramsType (Map NodeId Int))
mapNgramsDocs'
::
HashMap
.
HashMap
ExtractedNgrams
(
Map
NgramsType
(
Map
NodeId
(
TermsWeight
,
TermsCount
)))
<-
mapNodeIdNgrams
<$>
documentIdWithNgrams
...
...
@@ -432,10 +425,8 @@ insertMasterDocs ncs c lang hs = do
(
map
(
B
.
first
contextId2NodeId
)
documentsWithId
)
lId
<-
getOrMkList
masterCorpusId
masterUserId
-- _ <- saveDocNgramsWith lId mapNgramsDocs'
_
<-
saveDocNgramsWith
lId
mapNgramsDocs'
-- _cooc <- insertDefaultNode NodeListCooc lId masterUserId
pure
$
map
contextId2NodeId
ids'
...
...
@@ -444,9 +435,6 @@ saveDocNgramsWith :: (IsDBCmd env err m)
->
HashMap
.
HashMap
ExtractedNgrams
(
Map
NgramsType
(
Map
NodeId
(
TermsWeight
,
TermsCount
)))
->
m
()
saveDocNgramsWith
lId
mapNgramsDocs'
=
do
--printDebug "[saveDocNgramsWith] mapNgramsDocs'" mapNgramsDocs'
-- let mapNgramsDocsNoCount :: HashMap.HashMap ExtractedNgrams (Map NgramsType (Map NodeId Int))
-- mapNgramsDocsNoCount = over (traverse . traverse . traverse) fst mapNgramsDocs'
(
terms2id
::
HashMap
.
HashMap
Text
NgramsId
)
<-
insertExtractedNgrams
$
HashMap
.
keys
mapNgramsDocs'
let
mapNgramsDocs
::
HashMap
.
HashMap
Ngrams
(
Map
NgramsType
(
Map
NodeId
(
TermsWeight
,
TermsCount
)))
...
...
@@ -457,8 +445,6 @@ saveDocNgramsWith lId mapNgramsDocs' = do
$
map
(
bimap
_ngramsTerms
Map
.
keys
)
$
HashMap
.
toList
mapNgramsDocs
--printDebug "saveDocNgramsWith" mapCgramsId
-- insertDocNgrams
let
ngrams2insert
=
catMaybes
[
ContextNodeNgrams2
(
nodeId2ContextId
nId
)
<$>
getCgramsId
mapCgramsId
ngrams_type
(
_ngramsTerms
terms''
)
<*>
Just
(
fromIntegral
$
unTermsWeight
w
::
Double
)
...
...
@@ -466,7 +452,6 @@ saveDocNgramsWith lId mapNgramsDocs' = do
,
(
ngrams_type
,
mapNodeIdWeight
)
<-
Map
.
toList
mapNgramsTypes
,
(
nId
,
(
w
,
_cnt
))
<-
Map
.
toList
mapNodeIdWeight
]
-- printDebug "Ngrams2Insert" ngrams2insert
_return
<-
insertContextNodeNgrams2
ngrams2insert
-- to be removed
...
...
@@ -501,9 +486,6 @@ reIndexWith cId lId nt lts = do
-- Get all documents of the corpus
(
docs
::
[
ContextOnlyId
HyperdataDocument
])
<-
selectDocNodesOnlyId
cId
let
ngramsByDoc'
=
ngramsByDoc
corpusLang
nt
ts
docs
-- Saving the indexation in database
mapM_
(
saveDocNgramsWith
lId
)
ngramsByDoc'
pure
()
mapM_
(
saveDocNgramsWith
lId
.
ngramsByDoc
corpusLang
nt
ts
)
docs
src/Gargantext/Database/Action/Flow/Utils.hs
View file @
be879b1e
...
...
@@ -185,23 +185,14 @@ toInserted =
-- | Given list of terms and a document, produce a map for this doc's
-- terms count and weights. Notice that the weight is always 1 here.
ngramsByDoc
::
Lang
->
NgramsType
->
[
NT
.
NgramsTerm
]
->
[
ContextOnlyId
HyperdataDocument
]
->
[
HashMap
.
HashMap
ExtractedNgrams
(
DM
.
Map
NgramsType
(
Map
NodeId
(
TermsWeight
,
TermsCount
)))]
ngramsByDoc
l
nt
ts
docs
=
ngramsByDoc'
l
nt
ts
<$>
docs
-- | Given list of terms and a document, produce a map for this doc's
-- terms count and weights. Notice that the weight is always 1 here.
ngramsByDoc'
::
Lang
->
NgramsType
->
[
NT
.
NgramsTerm
]
->
ContextOnlyId
HyperdataDocument
->
HashMap
.
HashMap
ExtractedNgrams
(
DM
.
Map
NgramsType
(
Map
NodeId
(
TermsWeight
,
TermsCount
)))
ngramsByDoc'
l
nt
ts
doc
=
->
ContextOnlyId
HyperdataDocument
->
HashMap
.
HashMap
ExtractedNgrams
(
DM
.
Map
NgramsType
(
Map
NodeId
(
TermsWeight
,
TermsCount
)))
ngramsByDoc
l
nt
ts
doc
=
HashMap
.
map
(
\
cnt
->
DM
.
singleton
nt
$
DM
.
singleton
nId
(
1
,
cnt
))
extractedMap
where
matched
::
[(
MatchedText
,
TermsCount
)]
...
...
src/Gargantext/Database/Schema/Context.hs
View file @
be879b1e
...
...
@@ -54,6 +54,8 @@ $(makeLensesWith abbreviatedFields ''ContextPoly)
------------------------------------------------------------------------
-- | This datatype describes queries in the `contexts` table, where
-- only `id` and `hyperdata` are fetched.
data
ContextPolyOnlyId
id
hyperdata
=
ContextOnlyId
{
_context_oid_id
::
!
id
,
_context_oid_hyperdata
::
!
hyperdata
}
...
...
test/Test/Ngrams/Terms.hs
View file @
be879b1e
...
...
@@ -142,18 +142,16 @@ testNgramsByDoc01 = do
,
_hd_abstract
=
Nothing
}
let
ctx2
=
ContextOnlyId
2
hd2
ngramsByDoc
EN
NgramsTerms
terms
[
ctx1
,
ctx2
]
@?=
[
HashMap
.
fromList
ngramsByDoc
EN
NgramsTerms
terms
ctx1
@?=
HashMap
.
fromList
[
(
SimpleNgrams
$
UnsafeNgrams
{
_ngramsTerms
=
"hello"
,
_ngramsSize
=
1
}
,
Map
.
singleton
NgramsTerms
$
Map
.
singleton
(
UnsafeMkNodeId
1
)
(
1
,
1
)
)
,
(
SimpleNgrams
$
UnsafeNgrams
{
_ngramsTerms
=
"world"
,
_ngramsSize
=
1
}
,
Map
.
singleton
NgramsTerms
$
Map
.
singleton
(
UnsafeMkNodeId
1
)
(
1
,
1
)
)
]
,
HashMap
.
fromList
ngramsByDoc
EN
NgramsTerms
terms
ctx2
@?=
HashMap
.
fromList
[
(
SimpleNgrams
$
UnsafeNgrams
{
_ngramsTerms
=
"world"
,
_ngramsSize
=
1
}
,
Map
.
singleton
NgramsTerms
$
Map
.
singleton
(
UnsafeMkNodeId
2
)
(
1
,
2
)
)
]
]
ngramsByDoc
EN
NgramsTerms
terms
[
ctx1
,
ctx2
]
@?=
(
ngramsByDoc
EN
NgramsTerms
terms
[
ctx1
])
<>
(
ngramsByDoc
EN
NgramsTerms
terms
[
ctx2
])
Przemyslaw Kaminski
@cgenie
mentioned in commit
03b33383
·
Jan 30, 2025
mentioned in commit
03b33383
mentioned in commit 03b33383dd67c1821a4edb4628923cf7bd039d90
Toggle commit list
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment