Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
195
Issues
195
List
Board
Labels
Milestones
Merge Requests
12
Merge Requests
12
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
gargantext
haskell-gargantext
Commits
2b0c0c9b
Commit
2b0c0c9b
authored
May 10, 2019
by
Alexandre Delanoë
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
[FIX] Score by Doc or Corpus.
parent
7643b2ea
Pipeline
#379
failed with stage
Changes
4
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
69 additions
and
26 deletions
+69
-26
Ngrams.hs
src/Gargantext/API/Ngrams.hs
+18
-7
Flow.hs
src/Gargantext/Database/Flow.hs
+2
-5
NgramsByNode.hs
src/Gargantext/Database/Metrics/NgramsByNode.hs
+42
-5
List.hs
src/Gargantext/Text/List.hs
+7
-9
No files found.
src/Gargantext/API/Ngrams.hs
View file @
2b0c0c9b
...
...
@@ -72,7 +72,7 @@ import GHC.Generics (Generic)
import
Gargantext.Core.Utils.Prefix
(
unPrefix
)
-- import Gargantext.Database.Schema.Ngrams (NgramsTypeId, ngramsTypeId, NgramsTableData(..))
import
Gargantext.Database.Config
(
userMaster
)
import
Gargantext.Database.Metrics.NgramsByNode
(
getOccByNgramsOnlyS
afe
)
import
Gargantext.Database.Metrics.NgramsByNode
(
getOccByNgramsOnlyS
low
)
import
Gargantext.Database.Schema.Ngrams
(
NgramsType
)
import
Gargantext.Database.Types.Node
(
NodeType
(
..
))
import
Gargantext.Database.Utils
(
fromField'
,
HasConnection
)
...
...
@@ -236,7 +236,7 @@ ngramsElementFromRepo
,
_ne_parent
=
p
,
_ne_children
=
c
,
_ne_ngrams
=
ngrams
,
_ne_occurrences
=
panic
"API.Ngrams._ne_occurrences"
,
_ne_occurrences
=
0
--
panic "API.Ngrams._ne_occurrences"
{-
-- Here we could use 0 if we want to avoid any `panic`.
-- It will not happen using getTableNgrams if
...
...
@@ -875,14 +875,14 @@ type MaxSize = Int
getTableNgrams
::
(
RepoCmdM
env
err
m
,
HasNodeError
err
,
HasConnection
env
)
=>
NodeId
->
TabType
=>
Node
Type
->
Node
Id
->
TabType
->
ListId
->
Limit
->
Maybe
Offset
->
Maybe
ListType
->
Maybe
MinSize
->
Maybe
MaxSize
->
Maybe
OrderBy
->
(
NgramsTerm
->
Bool
)
->
m
(
Versioned
NgramsTable
)
getTableNgrams
nId
tabType
listId
limit_
offset
getTableNgrams
n
Type
n
Id
tabType
listId
limit_
offset
listType
minSize
maxSize
orderBy
searchQuery
=
do
lIds
<-
selectNodesWithUsername
NodeList
userMaster
...
...
@@ -902,12 +902,14 @@ getTableNgrams nId tabType listId limit_ offset
selected_inner
roots
n
=
maybe
False
(`
Set
.
member
`
roots
)
(
n
^.
ne_root
)
---------------------------------------
sortOnOrder
Nothing
=
identity
sortOnOrder
(
Just
TermAsc
)
=
List
.
sortOn
$
view
ne_ngrams
sortOnOrder
(
Just
TermDesc
)
=
List
.
sortOn
$
Down
.
view
ne_ngrams
sortOnOrder
(
Just
ScoreAsc
)
=
List
.
sortOn
$
view
ne_occurrences
sortOnOrder
(
Just
ScoreDesc
)
=
List
.
sortOn
$
Down
.
view
ne_occurrences
---------------------------------------
selectAndPaginate
tableMap
(
NgramsTable
list
)
=
NgramsTable
$
roots
<>
inners
where
rootOf
ne
=
maybe
ne
(
\
r
->
ngramsElementFromRepo
(
r
,
fromMaybe
(
panic
"getTableNgrams: invalid root"
)
(
tableMap
^.
v_data
.
at
r
)))
...
...
@@ -920,14 +922,20 @@ getTableNgrams nId tabType listId limit_ offset
rootsSet
=
Set
.
fromList
(
_ne_ngrams
<$>
roots
)
inners
=
list
&
filter
(
selected_inner
rootsSet
)
---------------------------------------
setScores
False
table
=
pure
table
setScores
True
table
=
do
occurrences
<-
getOccByNgramsOnlySafe
nId
(
lIds
<>
[
listId
])
ngramsType
(
table
^..
v_data
.
_NgramsTable
.
each
.
ne_ngrams
)
let
ngrams_terms
=
(
table
^..
v_data
.
_NgramsTable
.
each
.
ne_ngrams
)
occurrences
<-
getOccByNgramsOnlySlow
nType
nId
(
lIds
<>
[
listId
])
ngramsType
ngrams_terms
let
setOcc
ne
=
ne
&
ne_occurrences
.~
sumOf
(
at
(
ne
^.
ne_ngrams
)
.
_Just
)
occurrences
pure
$
table
&
v_data
.
_NgramsTable
.
each
%~
setOcc
---------------------------------------
-- lists <- catMaybes <$> listsWith userMaster
-- trace (show lists) $
...
...
@@ -1003,7 +1011,7 @@ getTableNgramsCorpus :: (RepoCmdM env err m, HasNodeError err, HasConnection env
->
Maybe
Text
-- full text search
->
m
(
Versioned
NgramsTable
)
getTableNgramsCorpus
nId
tabType
listId
limit_
offset
listType
minSize
maxSize
orderBy
mt
=
getTableNgrams
nId
tabType
listId
limit_
offset
listType
minSize
maxSize
orderBy
searchQuery
getTableNgrams
NodeCorpus
nId
tabType
listId
limit_
offset
listType
minSize
maxSize
orderBy
searchQuery
where
searchQuery
=
maybe
(
const
True
)
isInfixOf
mt
...
...
@@ -1021,7 +1029,10 @@ getTableNgramsDoc dId tabType listId limit_ offset listType minSize maxSize orde
let
ngramsType
=
ngramsTypeFromTabType
tabType
ngs
<-
selectNgramsByDoc
(
ns
<>
[
listId
])
dId
ngramsType
let
searchQuery
=
flip
S
.
member
(
S
.
fromList
ngs
)
getTableNgrams
dId
tabType
listId
limit_
offset
listType
minSize
maxSize
orderBy
searchQuery
getTableNgrams
NodeDocument
dId
tabType
listId
limit_
offset
listType
minSize
maxSize
orderBy
searchQuery
...
...
src/Gargantext/Database/Flow.hs
View file @
2b0c0c9b
...
...
@@ -28,6 +28,7 @@ Portability : POSIX
module
Gargantext.Database.Flow
-- (flowDatabase, ngrams2list)
where
--import Debug.Trace (trace)
import
Control.Lens
((
^.
),
view
,
Lens
'
,
_Just
)
import
Control.Monad
(
mapM_
)
import
Control.Monad.IO.Class
(
liftIO
)
...
...
@@ -126,7 +127,6 @@ flowCorpusSearchInDatabase u la q = do
------------------------------------------------------------------------
flow
::
(
FlowCmdM
env
ServantErr
m
,
FlowCorpus
a
,
MkCorpus
c
)
=>
Maybe
c
->
Username
->
CorpusName
->
TermType
Lang
->
[[
a
]]
->
m
CorpusId
flow
c
u
cn
la
docs
=
do
...
...
@@ -177,10 +177,7 @@ insertMasterDocs c lang hs = do
ids
<-
insertDb
masterUserId
masterCorpusId
hs'
let
documentsWithId
=
mergeData
(
toInserted
ids
)
(
DM
.
fromList
$
map
viewUniqId'
hs'
)
docsWithNgrams
<-
documentIdWithNgrams
(
extractNgramsT
lang
)
documentsWithId
let
maps
=
mapNodeIdNgrams
docsWithNgrams
maps
<-
mapNodeIdNgrams
<$>
documentIdWithNgrams
(
extractNgramsT
lang
)
documentsWithId
terms2id
<-
insertNgrams
$
DM
.
keys
maps
let
indexedNgrams
=
DM
.
mapKeys
(
indexNgrams
terms2id
)
maps
...
...
src/Gargantext/Database/Metrics/NgramsByNode.hs
View file @
2b0c0c9b
...
...
@@ -162,17 +162,21 @@ getOccByNgramsOnlyFast cId nt ngs =
fromListWith
(
+
)
<$>
selectNgramsOccurrencesOnlyByNodeUser
cId
nt
ngs
-- just slower than getOccByNgramsOnlyFast
getOccByNgramsOnlySlow
::
CorpusId
->
[
ListId
]
->
NgramsType
->
[
Text
]
getOccByNgramsOnlySlow
::
NodeType
->
CorpusId
->
[
ListId
]
->
NgramsType
->
[
Text
]
->
Cmd
err
(
Map
Text
Int
)
getOccByNgramsOnlySlow
cId
ls
nt
ngs
=
Map
.
map
Set
.
size
<$>
getNodesByNgramsOnlyUser
cId
ls
nt
ngs
getOccByNgramsOnlySlow
t
cId
ls
nt
ngs
=
Map
.
map
Set
.
size
<$>
getScore'
t
cId
ls
nt
ngs
where
getScore'
NodeCorpus
=
getNodesByNgramsOnlyUser
getScore'
NodeDocument
=
getNgramsByDocOnlyUser
getScore'
_
=
getNodesByNgramsOnlyUser
getOccByNgramsOnlySafe
::
CorpusId
->
[
ListId
]
->
NgramsType
->
[
Text
]
->
Cmd
err
(
Map
Text
Int
)
getOccByNgramsOnlySafe
cId
ls
nt
ngs
=
do
printDebug
"getOccByNgramsOnlySafe"
(
cId
,
nt
,
length
ngs
)
fast
<-
getOccByNgramsOnlyFast
cId
nt
ngs
slow
<-
getOccByNgramsOnlySlow
cId
ls
nt
ngs
slow
<-
getOccByNgramsOnlySlow
NodeCorpus
cId
ls
nt
ngs
when
(
fast
/=
slow
)
$
printDebug
"getOccByNgramsOnlySafe: difference"
(
diff
slow
fast
::
PatchMap
Text
(
Replace
(
Maybe
Int
)))
pure
slow
...
...
@@ -209,7 +213,7 @@ queryNgramsOccurrencesOnlyByNodeUser = [sql|
GROUP BY nng.node2_id, ng.terms
|]
getNodesByNgramsOnlyUser
::
Corpus
Id
->
[
ListId
]
->
NgramsType
->
[
Text
]
getNodesByNgramsOnlyUser
::
Node
Id
->
[
ListId
]
->
NgramsType
->
[
Text
]
->
Cmd
err
(
Map
Text
(
Set
NodeId
))
getNodesByNgramsOnlyUser
cId
ls
nt
ngs
=
Map
.
unionsWith
(
<>
)
.
map
(
fromListWith
(
<>
)
.
map
(
second
Set
.
singleton
))
...
...
@@ -248,6 +252,39 @@ queryNgramsOnlyByNodeUser = [sql|
getNgramsByDocOnlyUser
::
NodeId
->
[
ListId
]
->
NgramsType
->
[
Text
]
->
Cmd
err
(
Map
Text
(
Set
NodeId
))
getNgramsByDocOnlyUser
cId
ls
nt
ngs
=
Map
.
unionsWith
(
<>
)
.
map
(
fromListWith
(
<>
)
.
map
(
second
Set
.
singleton
))
<$>
mapM
(
selectNgramsOnlyByDocUser
cId
ls
nt
)
(
splitEvery
1000
ngs
)
selectNgramsOnlyByDocUser
::
DocId
->
[
ListId
]
->
NgramsType
->
[
Text
]
->
Cmd
err
[(
Text
,
NodeId
)]
selectNgramsOnlyByDocUser
dId
ls
nt
tms
=
runPGSQuery
queryNgramsOnlyByDocUser
(
Values
fields
(
DPS
.
Only
<$>
tms
)
,
Values
[
QualifiedIdentifier
Nothing
"int4"
]
(
DPS
.
Only
<$>
(
map
(
\
(
NodeId
n
)
->
n
)
ls
))
,
dId
,
ngramsTypeId
nt
)
where
fields
=
[
QualifiedIdentifier
Nothing
"text"
]
queryNgramsOnlyByDocUser
::
DPS
.
Query
queryNgramsOnlyByDocUser
=
[
sql
|
WITH input_rows(terms) AS (?),
input_list(id) AS (?)
SELECT ng.terms, nng.node2_id FROM node_node_ngrams nng
JOIN ngrams ng ON nng.ngrams_id = ng.id
JOIN input_rows ir ON ir.terms = ng.terms
JOIN input_list il ON il.id = nng.node1_id
WHERE nng.node2_id = ? -- DocId
AND nng.ngrams_type = ? -- NgramsTypeId
GROUP BY ng.terms, nng.node2_id
|]
------------------------------------------------------------------------
...
...
src/Gargantext/Text/List.hs
View file @
2b0c0c9b
...
...
@@ -55,28 +55,26 @@ buildNgramsLists l n m s uCid mCid = do
pure
$
Map
.
unions
$
othersTerms
<>
[
ngTerms
]
buildNgramsOthersList
::
UserCorpusId
->
(
Text
->
Text
)
->
NgramsType
buildNgramsOthersList
::
UserCorpusId
->
(
Text
->
Text
)
->
NgramsType
->
Cmd
err
(
Map
NgramsType
[
NgramsElement
])
buildNgramsOthersList
uCid
groupIt
nt
=
do
ngs
<-
groupNodesByNgramsWith
groupIt
<$>
getNodesByNgramsUser
uCid
nt
pure
$
Map
.
fromList
[(
nt
,
[
mkNgramsElement
t
CandidateTerm
Nothing
(
mSetFromList
[]
)
|
(
t
,
_ns
)
<-
Map
.
toList
ngs
let
all'
=
Map
.
toList
ngs
pure
$
(
toElements
GraphTerm
$
take
10
all'
)
<>
(
toElements
CandidateTerm
$
drop
10
all'
)
where
toElements
nType
x
=
Map
.
fromList
[(
nt
,
[
mkNgramsElement
t
nType
Nothing
(
mSetFromList
[]
)
|
(
t
,
_ns
)
<-
x
]
)
]
-- TODO remove hard coded parameters
buildNgramsTermsList
::
Lang
->
Int
->
Int
->
StopSize
->
UserCorpusId
->
MasterCorpusId
->
Cmd
err
(
Map
NgramsType
[
NgramsElement
])
buildNgramsTermsList
l
n
m
s
uCid
mCid
=
do
candidates
<-
sortTficf
<$>
getTficf'
uCid
mCid
NgramsTerms
(
ngramsGroup
l
n
m
)
--printDebug "candidate" (length candidates)
let
termList
=
toTermList
((
isStopTerm
s
)
.
fst
)
candidates
--let termList = toTermList ((\_ -> False) . fst) candidates
--printDebug "termlist" (length termList)
let
ngs
=
List
.
concat
$
map
toNgramsElement
termList
pure
$
Map
.
fromList
[(
NgramsTerms
,
ngs
)]
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment