Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
H
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Przemyslaw Kaminski
haskell-gargantext
Commits
2b0c0c9b
Commit
2b0c0c9b
authored
May 10, 2019
by
Alexandre Delanoë
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
[FIX] Score by Doc or Corpus.
parent
7643b2ea
Changes
4
Show whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
69 additions
and
26 deletions
+69
-26
Ngrams.hs
src/Gargantext/API/Ngrams.hs
+18
-7
Flow.hs
src/Gargantext/Database/Flow.hs
+2
-5
NgramsByNode.hs
src/Gargantext/Database/Metrics/NgramsByNode.hs
+42
-5
List.hs
src/Gargantext/Text/List.hs
+7
-9
No files found.
src/Gargantext/API/Ngrams.hs
View file @
2b0c0c9b
...
...
@@ -72,7 +72,7 @@ import GHC.Generics (Generic)
import
Gargantext.Core.Utils.Prefix
(
unPrefix
)
-- import Gargantext.Database.Schema.Ngrams (NgramsTypeId, ngramsTypeId, NgramsTableData(..))
import
Gargantext.Database.Config
(
userMaster
)
import
Gargantext.Database.Metrics.NgramsByNode
(
getOccByNgramsOnlyS
afe
)
import
Gargantext.Database.Metrics.NgramsByNode
(
getOccByNgramsOnlyS
low
)
import
Gargantext.Database.Schema.Ngrams
(
NgramsType
)
import
Gargantext.Database.Types.Node
(
NodeType
(
..
))
import
Gargantext.Database.Utils
(
fromField'
,
HasConnection
)
...
...
@@ -236,7 +236,7 @@ ngramsElementFromRepo
,
_ne_parent
=
p
,
_ne_children
=
c
,
_ne_ngrams
=
ngrams
,
_ne_occurrences
=
panic
"API.Ngrams._ne_occurrences"
,
_ne_occurrences
=
0
--
panic "API.Ngrams._ne_occurrences"
{-
-- Here we could use 0 if we want to avoid any `panic`.
-- It will not happen using getTableNgrams if
...
...
@@ -875,14 +875,14 @@ type MaxSize = Int
getTableNgrams
::
(
RepoCmdM
env
err
m
,
HasNodeError
err
,
HasConnection
env
)
=>
NodeId
->
TabType
=>
Node
Type
->
Node
Id
->
TabType
->
ListId
->
Limit
->
Maybe
Offset
->
Maybe
ListType
->
Maybe
MinSize
->
Maybe
MaxSize
->
Maybe
OrderBy
->
(
NgramsTerm
->
Bool
)
->
m
(
Versioned
NgramsTable
)
getTableNgrams
nId
tabType
listId
limit_
offset
getTableNgrams
n
Type
n
Id
tabType
listId
limit_
offset
listType
minSize
maxSize
orderBy
searchQuery
=
do
lIds
<-
selectNodesWithUsername
NodeList
userMaster
...
...
@@ -902,12 +902,14 @@ getTableNgrams nId tabType listId limit_ offset
selected_inner
roots
n
=
maybe
False
(`
Set
.
member
`
roots
)
(
n
^.
ne_root
)
---------------------------------------
sortOnOrder
Nothing
=
identity
sortOnOrder
(
Just
TermAsc
)
=
List
.
sortOn
$
view
ne_ngrams
sortOnOrder
(
Just
TermDesc
)
=
List
.
sortOn
$
Down
.
view
ne_ngrams
sortOnOrder
(
Just
ScoreAsc
)
=
List
.
sortOn
$
view
ne_occurrences
sortOnOrder
(
Just
ScoreDesc
)
=
List
.
sortOn
$
Down
.
view
ne_occurrences
---------------------------------------
selectAndPaginate
tableMap
(
NgramsTable
list
)
=
NgramsTable
$
roots
<>
inners
where
rootOf
ne
=
maybe
ne
(
\
r
->
ngramsElementFromRepo
(
r
,
fromMaybe
(
panic
"getTableNgrams: invalid root"
)
(
tableMap
^.
v_data
.
at
r
)))
...
...
@@ -920,14 +922,20 @@ getTableNgrams nId tabType listId limit_ offset
rootsSet
=
Set
.
fromList
(
_ne_ngrams
<$>
roots
)
inners
=
list
&
filter
(
selected_inner
rootsSet
)
---------------------------------------
setScores
False
table
=
pure
table
setScores
True
table
=
do
occurrences
<-
getOccByNgramsOnlySafe
nId
(
lIds
<>
[
listId
])
ngramsType
(
table
^..
v_data
.
_NgramsTable
.
each
.
ne_ngrams
)
let
ngrams_terms
=
(
table
^..
v_data
.
_NgramsTable
.
each
.
ne_ngrams
)
occurrences
<-
getOccByNgramsOnlySlow
nType
nId
(
lIds
<>
[
listId
])
ngramsType
ngrams_terms
let
setOcc
ne
=
ne
&
ne_occurrences
.~
sumOf
(
at
(
ne
^.
ne_ngrams
)
.
_Just
)
occurrences
pure
$
table
&
v_data
.
_NgramsTable
.
each
%~
setOcc
---------------------------------------
-- lists <- catMaybes <$> listsWith userMaster
-- trace (show lists) $
...
...
@@ -1003,7 +1011,7 @@ getTableNgramsCorpus :: (RepoCmdM env err m, HasNodeError err, HasConnection env
->
Maybe
Text
-- full text search
->
m
(
Versioned
NgramsTable
)
getTableNgramsCorpus
nId
tabType
listId
limit_
offset
listType
minSize
maxSize
orderBy
mt
=
getTableNgrams
nId
tabType
listId
limit_
offset
listType
minSize
maxSize
orderBy
searchQuery
getTableNgrams
NodeCorpus
nId
tabType
listId
limit_
offset
listType
minSize
maxSize
orderBy
searchQuery
where
searchQuery
=
maybe
(
const
True
)
isInfixOf
mt
...
...
@@ -1021,7 +1029,10 @@ getTableNgramsDoc dId tabType listId limit_ offset listType minSize maxSize orde
let
ngramsType
=
ngramsTypeFromTabType
tabType
ngs
<-
selectNgramsByDoc
(
ns
<>
[
listId
])
dId
ngramsType
let
searchQuery
=
flip
S
.
member
(
S
.
fromList
ngs
)
getTableNgrams
dId
tabType
listId
limit_
offset
listType
minSize
maxSize
orderBy
searchQuery
getTableNgrams
NodeDocument
dId
tabType
listId
limit_
offset
listType
minSize
maxSize
orderBy
searchQuery
...
...
src/Gargantext/Database/Flow.hs
View file @
2b0c0c9b
...
...
@@ -28,6 +28,7 @@ Portability : POSIX
module
Gargantext.Database.Flow
-- (flowDatabase, ngrams2list)
where
--import Debug.Trace (trace)
import
Control.Lens
((
^.
),
view
,
Lens
'
,
_Just
)
import
Control.Monad
(
mapM_
)
import
Control.Monad.IO.Class
(
liftIO
)
...
...
@@ -126,7 +127,6 @@ flowCorpusSearchInDatabase u la q = do
------------------------------------------------------------------------
flow
::
(
FlowCmdM
env
ServantErr
m
,
FlowCorpus
a
,
MkCorpus
c
)
=>
Maybe
c
->
Username
->
CorpusName
->
TermType
Lang
->
[[
a
]]
->
m
CorpusId
flow
c
u
cn
la
docs
=
do
...
...
@@ -177,10 +177,7 @@ insertMasterDocs c lang hs = do
ids
<-
insertDb
masterUserId
masterCorpusId
hs'
let
documentsWithId
=
mergeData
(
toInserted
ids
)
(
DM
.
fromList
$
map
viewUniqId'
hs'
)
docsWithNgrams
<-
documentIdWithNgrams
(
extractNgramsT
lang
)
documentsWithId
let
maps
=
mapNodeIdNgrams
docsWithNgrams
maps
<-
mapNodeIdNgrams
<$>
documentIdWithNgrams
(
extractNgramsT
lang
)
documentsWithId
terms2id
<-
insertNgrams
$
DM
.
keys
maps
let
indexedNgrams
=
DM
.
mapKeys
(
indexNgrams
terms2id
)
maps
...
...
src/Gargantext/Database/Metrics/NgramsByNode.hs
View file @
2b0c0c9b
...
...
@@ -162,17 +162,21 @@ getOccByNgramsOnlyFast cId nt ngs =
fromListWith
(
+
)
<$>
selectNgramsOccurrencesOnlyByNodeUser
cId
nt
ngs
-- just slower than getOccByNgramsOnlyFast
getOccByNgramsOnlySlow
::
CorpusId
->
[
ListId
]
->
NgramsType
->
[
Text
]
getOccByNgramsOnlySlow
::
NodeType
->
CorpusId
->
[
ListId
]
->
NgramsType
->
[
Text
]
->
Cmd
err
(
Map
Text
Int
)
getOccByNgramsOnlySlow
cId
ls
nt
ngs
=
Map
.
map
Set
.
size
<$>
getNodesByNgramsOnlyUser
cId
ls
nt
ngs
getOccByNgramsOnlySlow
t
cId
ls
nt
ngs
=
Map
.
map
Set
.
size
<$>
getScore'
t
cId
ls
nt
ngs
where
getScore'
NodeCorpus
=
getNodesByNgramsOnlyUser
getScore'
NodeDocument
=
getNgramsByDocOnlyUser
getScore'
_
=
getNodesByNgramsOnlyUser
getOccByNgramsOnlySafe
::
CorpusId
->
[
ListId
]
->
NgramsType
->
[
Text
]
->
Cmd
err
(
Map
Text
Int
)
getOccByNgramsOnlySafe
cId
ls
nt
ngs
=
do
printDebug
"getOccByNgramsOnlySafe"
(
cId
,
nt
,
length
ngs
)
fast
<-
getOccByNgramsOnlyFast
cId
nt
ngs
slow
<-
getOccByNgramsOnlySlow
cId
ls
nt
ngs
slow
<-
getOccByNgramsOnlySlow
NodeCorpus
cId
ls
nt
ngs
when
(
fast
/=
slow
)
$
printDebug
"getOccByNgramsOnlySafe: difference"
(
diff
slow
fast
::
PatchMap
Text
(
Replace
(
Maybe
Int
)))
pure
slow
...
...
@@ -209,7 +213,7 @@ queryNgramsOccurrencesOnlyByNodeUser = [sql|
GROUP BY nng.node2_id, ng.terms
|]
getNodesByNgramsOnlyUser
::
Corpus
Id
->
[
ListId
]
->
NgramsType
->
[
Text
]
getNodesByNgramsOnlyUser
::
Node
Id
->
[
ListId
]
->
NgramsType
->
[
Text
]
->
Cmd
err
(
Map
Text
(
Set
NodeId
))
getNodesByNgramsOnlyUser
cId
ls
nt
ngs
=
Map
.
unionsWith
(
<>
)
.
map
(
fromListWith
(
<>
)
.
map
(
second
Set
.
singleton
))
...
...
@@ -248,6 +252,39 @@ queryNgramsOnlyByNodeUser = [sql|
getNgramsByDocOnlyUser
::
NodeId
->
[
ListId
]
->
NgramsType
->
[
Text
]
->
Cmd
err
(
Map
Text
(
Set
NodeId
))
getNgramsByDocOnlyUser
cId
ls
nt
ngs
=
Map
.
unionsWith
(
<>
)
.
map
(
fromListWith
(
<>
)
.
map
(
second
Set
.
singleton
))
<$>
mapM
(
selectNgramsOnlyByDocUser
cId
ls
nt
)
(
splitEvery
1000
ngs
)
selectNgramsOnlyByDocUser
::
DocId
->
[
ListId
]
->
NgramsType
->
[
Text
]
->
Cmd
err
[(
Text
,
NodeId
)]
selectNgramsOnlyByDocUser
dId
ls
nt
tms
=
runPGSQuery
queryNgramsOnlyByDocUser
(
Values
fields
(
DPS
.
Only
<$>
tms
)
,
Values
[
QualifiedIdentifier
Nothing
"int4"
]
(
DPS
.
Only
<$>
(
map
(
\
(
NodeId
n
)
->
n
)
ls
))
,
dId
,
ngramsTypeId
nt
)
where
fields
=
[
QualifiedIdentifier
Nothing
"text"
]
queryNgramsOnlyByDocUser
::
DPS
.
Query
queryNgramsOnlyByDocUser
=
[
sql
|
WITH input_rows(terms) AS (?),
input_list(id) AS (?)
SELECT ng.terms, nng.node2_id FROM node_node_ngrams nng
JOIN ngrams ng ON nng.ngrams_id = ng.id
JOIN input_rows ir ON ir.terms = ng.terms
JOIN input_list il ON il.id = nng.node1_id
WHERE nng.node2_id = ? -- DocId
AND nng.ngrams_type = ? -- NgramsTypeId
GROUP BY ng.terms, nng.node2_id
|]
------------------------------------------------------------------------
...
...
src/Gargantext/Text/List.hs
View file @
2b0c0c9b
...
...
@@ -60,23 +60,21 @@ buildNgramsOthersList :: UserCorpusId -> (Text -> Text) -> NgramsType
buildNgramsOthersList
uCid
groupIt
nt
=
do
ngs
<-
groupNodesByNgramsWith
groupIt
<$>
getNodesByNgramsUser
uCid
nt
pure
$
Map
.
fromList
[(
nt
,
[
mkNgramsElement
t
CandidateTerm
Nothing
(
mSetFromList
[]
)
|
(
t
,
_ns
)
<-
Map
.
toList
ngs
let
all'
=
Map
.
toList
ngs
pure
$
(
toElements
GraphTerm
$
take
10
all'
)
<>
(
toElements
CandidateTerm
$
drop
10
all'
)
where
toElements
nType
x
=
Map
.
fromList
[(
nt
,
[
mkNgramsElement
t
nType
Nothing
(
mSetFromList
[]
)
|
(
t
,
_ns
)
<-
x
]
)
]
-- TODO remove hard coded parameters
buildNgramsTermsList
::
Lang
->
Int
->
Int
->
StopSize
->
UserCorpusId
->
MasterCorpusId
->
Cmd
err
(
Map
NgramsType
[
NgramsElement
])
buildNgramsTermsList
l
n
m
s
uCid
mCid
=
do
candidates
<-
sortTficf
<$>
getTficf'
uCid
mCid
NgramsTerms
(
ngramsGroup
l
n
m
)
--printDebug "candidate" (length candidates)
let
termList
=
toTermList
((
isStopTerm
s
)
.
fst
)
candidates
--let termList = toTermList ((\_ -> False) . fst) candidates
--printDebug "termlist" (length termList)
let
ngs
=
List
.
concat
$
map
toNgramsElement
termList
pure
$
Map
.
fromList
[(
NgramsTerms
,
ngs
)]
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment