Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
150
Issues
150
List
Board
Labels
Milestones
Merge Requests
5
Merge Requests
5
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
gargantext
haskell-gargantext
Commits
8804c4e7
Commit
8804c4e7
authored
Apr 21, 2020
by
Alexandre Delanoë
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
[CLEAN] metrics
parent
80fbde18
Changes
3
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
106 additions
and
56 deletions
+106
-56
List.hs
src/Gargantext/Database/Action/Flow/List.hs
+4
-2
NgramsByNode.hs
src/Gargantext/Database/Action/Metrics/NgramsByNode.hs
+65
-36
List.hs
src/Gargantext/Text/List.hs
+37
-18
No files found.
src/Gargantext/Database/Action/Flow/List.hs
View file @
8804c4e7
...
...
@@ -69,7 +69,9 @@ flowList_DbRepo lId ngs = do
-- Inserting groups of ngrams
_r
<-
insert_Node_NodeNgrams_NodeNgrams
$
map
(
\
(
a
,
b
)
->
Node_NodeNgrams_NodeNgrams
lId
a
b
Nothing
)
toInsert
listInsert
lId
ngs
--trace (show $ List.filter (\n -> _ne_ngrams n == "versatile") $ List.concat $ Map.elems ngs) $ listInsert lId ngs
pure
lId
------------------------------------------------------------------------
...
...
@@ -88,6 +90,7 @@ toNodeNgramsW l ngs = List.concat $ map (toNodeNgramsW'' l) ngs
(
NgramsElement
ngrams_terms'
_size
list_type
_occ
_root
_parent
_children
)
<-
elms
]
toNodeNgramsW'
::
ListId
->
[(
Text
,
[
NgramsType
])]
->
[
NodeNgramsW
]
...
...
@@ -102,8 +105,7 @@ listInsert :: FlowCmdM env err m
->
Map
NgramsType
[
NgramsElement
]
->
m
()
listInsert
lId
ngs
=
mapM_
(
\
(
typeList
,
ngElmts
)
->
putListNgrams
lId
typeList
ngElmts
)
$
toList
ngs
->
putListNgrams
lId
typeList
ngElmts
)
(
toList
ngs
)
------------------------------------------------------------------------
------------------------------------------------------------------------
src/Gargantext/Database/Action/Metrics/NgramsByNode.hs
View file @
8804c4e7
...
...
@@ -46,7 +46,11 @@ import qualified Database.PostgreSQL.Simple as DPS
-- discussed. Main purpose of this is offering
-- a first grouping option to user and get some
-- enriched data to better learn and improve that algo
ngramsGroup
::
Lang
->
Int
->
Int
->
Text
->
Text
ngramsGroup
::
Lang
->
Int
->
Int
->
Text
->
Text
ngramsGroup
l
_m
_n
=
Text
.
intercalate
" "
.
map
(
stem
l
)
-- . take n
...
...
@@ -61,17 +65,24 @@ sortTficf :: (Map Text (Double, Set Text))
sortTficf
=
List
.
sortOn
(
fst
.
snd
)
.
toList
getTficf'
::
UserCorpusId
->
MasterCorpusId
->
NgramsType
->
(
Text
->
Text
)
getTficf
::
UserCorpusId
->
MasterCorpusId
->
NgramsType
->
(
Text
->
Text
)
->
Cmd
err
(
Map
Text
(
Double
,
Set
Text
))
getTficf
'
u
m
nt
f
=
do
getTficf
u
m
nt
f
=
do
u'
<-
getNodesByNgramsUser
u
nt
m'
<-
getNodesByNgramsMaster
u
m
pure
$
toTficfData
(
countNodesByNgramsWith
f
u'
)
(
countNodesByNgramsWith
f
m'
)
getTficfWith
::
UserCorpusId
->
MasterCorpusId
->
[
ListId
]
->
NgramsType
->
Map
Text
(
Maybe
Text
)
{-
getTficfWith :: UserCorpusId
-> MasterCorpusId
-> [ListId]
-> NgramsType
-> Map Text (Maybe Text)
-> Cmd err (Map Text (Double, Set Text))
getTficfWith u m ls nt mtxt = do
u' <- getNodesByNgramsOnlyUser u ls nt (Map.keys mtxt)
...
...
@@ -83,13 +94,14 @@ getTficfWith u m ls nt mtxt = do
pure $ toTficfData (countNodesByNgramsWith f u')
(countNodesByNgramsWith f m')
-}
type
Context
=
(
Double
,
Map
Text
(
Double
,
Set
Text
))
type
Supra
=
Context
type
Infra
=
Context
toTficfData
::
Infra
->
Supra
toTficfData
::
Infra
->
Supra
->
Map
Text
(
Double
,
Set
Text
)
toTficfData
(
ti
,
mi
)
(
ts
,
ms
)
=
fromList
[
(
t
,
(
tficf
(
TficfInfra
(
Count
n
)(
Total
ti
))
...
...
@@ -129,7 +141,8 @@ getNodesByNgramsUser cId nt =
<$>
selectNgramsByNodeUser
cId
nt
where
selectNgramsByNodeUser
::
CorpusId
->
NgramsType
selectNgramsByNodeUser
::
CorpusId
->
NgramsType
->
Cmd
err
[(
NodeId
,
Text
)]
selectNgramsByNodeUser
cId'
nt'
=
runPGSQuery
queryNgramsByNodeUser
...
...
@@ -202,8 +215,6 @@ getOccByNgramsOnlyFast' cId lId nt tms = trace (show (cId, lId)) $
|]
-- just slower than getOccByNgramsOnlyFast
getOccByNgramsOnlySlow
::
NodeType
->
CorpusId
...
...
@@ -228,11 +239,14 @@ getOccByNgramsOnlySafe cId ls nt ngs = do
fast
<-
getOccByNgramsOnlyFast
cId
nt
ngs
slow
<-
getOccByNgramsOnlySlow
NodeCorpus
cId
ls
nt
ngs
when
(
fast
/=
slow
)
$
printDebug
"getOccByNgramsOnlySafe: difference"
(
diff
slow
fast
::
PatchMap
Text
(
Replace
(
Maybe
Int
)))
printDebug
"getOccByNgramsOnlySafe: difference"
(
diff
slow
fast
::
PatchMap
Text
(
Replace
(
Maybe
Int
)))
pure
slow
selectNgramsOccurrencesOnlyByNodeUser
::
CorpusId
->
NgramsType
->
[
Text
]
selectNgramsOccurrencesOnlyByNodeUser
::
CorpusId
->
NgramsType
->
[
Text
]
->
Cmd
err
[(
Text
,
Int
)]
selectNgramsOccurrencesOnlyByNodeUser
cId
nt
tms
=
runPGSQuery
queryNgramsOccurrencesOnlyByNodeUser
...
...
@@ -262,8 +276,6 @@ queryNgramsOccurrencesOnlyByNodeUser = [sql|
GROUP BY nng.node2_id, ng.terms
|]
queryNgramsOccurrencesOnlyByNodeUser'
::
DPS
.
Query
queryNgramsOccurrencesOnlyByNodeUser'
=
[
sql
|
WITH input_rows(terms) AS (?)
...
...
@@ -280,8 +292,10 @@ queryNgramsOccurrencesOnlyByNodeUser' = [sql|
|]
------------------------------------------------------------------------
getNodesByNgramsOnlyUser
::
NodeId
->
[
ListId
]
->
NgramsType
->
[
Text
]
getNodesByNgramsOnlyUser
::
NodeId
->
[
ListId
]
->
NgramsType
->
[
Text
]
->
Cmd
err
(
Map
Text
(
Set
NodeId
))
getNodesByNgramsOnlyUser
cId
ls
nt
ngs
=
Map
.
unionsWith
(
<>
)
...
...
@@ -305,12 +319,16 @@ getNgramsByNodeOnlyUser cId ls nt ngs =
(
splitEvery
1000
ngs
)
------------------------------------------------------------------------
selectNgramsOnlyByNodeUser
::
CorpusId
->
[
ListId
]
->
NgramsType
->
[
Text
]
selectNgramsOnlyByNodeUser
::
CorpusId
->
[
ListId
]
->
NgramsType
->
[
Text
]
->
Cmd
err
[(
Text
,
NodeId
)]
selectNgramsOnlyByNodeUser
cId
ls
nt
tms
=
runPGSQuery
queryNgramsOnlyByNodeUser
(
Values
fields
(
DPS
.
Only
<$>
tms
)
,
Values
[
QualifiedIdentifier
Nothing
"int4"
]
(
DPS
.
Only
<$>
(
map
(
\
(
NodeId
n
)
->
n
)
ls
))
,
Values
[
QualifiedIdentifier
Nothing
"int4"
]
(
DPS
.
Only
<$>
(
map
(
\
(
NodeId
n
)
->
n
)
ls
))
,
cId
,
nodeTypeId
NodeDocument
,
ngramsTypeId
nt
...
...
@@ -336,13 +354,16 @@ queryNgramsOnlyByNodeUser = [sql|
|]
selectNgramsOnlyByNodeUser'
::
CorpusId
->
[
ListId
]
->
NgramsType
->
[
Text
]
selectNgramsOnlyByNodeUser'
::
CorpusId
->
[
ListId
]
->
NgramsType
->
[
Text
]
->
Cmd
err
[(
Text
,
Int
)]
selectNgramsOnlyByNodeUser'
cId
ls
nt
tms
=
runPGSQuery
queryNgramsOnlyByNodeUser
(
Values
fields
(
DPS
.
Only
<$>
tms
)
,
Values
[
QualifiedIdentifier
Nothing
"int4"
]
(
DPS
.
Only
<$>
(
map
(
\
(
NodeId
n
)
->
n
)
ls
))
,
Values
[
QualifiedIdentifier
Nothing
"int4"
]
(
DPS
.
Only
<$>
(
map
(
\
(
NodeId
n
)
->
n
)
ls
))
,
cId
,
nodeTypeId
NodeDocument
,
ngramsTypeId
nt
...
...
@@ -365,8 +386,10 @@ queryNgramsOnlyByNodeUser' = [sql|
|]
getNgramsByDocOnlyUser
::
NodeId
->
[
ListId
]
->
NgramsType
->
[
Text
]
getNgramsByDocOnlyUser
::
NodeId
->
[
ListId
]
->
NgramsType
->
[
Text
]
->
Cmd
err
(
Map
Text
(
Set
NodeId
))
getNgramsByDocOnlyUser
cId
ls
nt
ngs
=
Map
.
unionsWith
(
<>
)
...
...
@@ -374,18 +397,23 @@ getNgramsByDocOnlyUser cId ls nt ngs =
<$>
mapM
(
selectNgramsOnlyByDocUser
cId
ls
nt
)
(
splitEvery
1000
ngs
)
selectNgramsOnlyByDocUser
::
DocId
->
[
ListId
]
->
NgramsType
->
[
Text
]
selectNgramsOnlyByDocUser
::
DocId
->
[
ListId
]
->
NgramsType
->
[
Text
]
->
Cmd
err
[(
Text
,
NodeId
)]
selectNgramsOnlyByDocUser
dId
ls
nt
tms
=
runPGSQuery
queryNgramsOnlyByDocUser
(
Values
fields
(
DPS
.
Only
<$>
tms
)
,
Values
[
QualifiedIdentifier
Nothing
"int4"
]
(
DPS
.
Only
<$>
(
map
(
\
(
NodeId
n
)
->
n
)
ls
))
,
Values
[
QualifiedIdentifier
Nothing
"int4"
]
(
DPS
.
Only
<$>
(
map
(
\
(
NodeId
n
)
->
n
)
ls
))
,
dId
,
ngramsTypeId
nt
)
where
fields
=
[
QualifiedIdentifier
Nothing
"text"
]
queryNgramsOnlyByDocUser
::
DPS
.
Query
queryNgramsOnlyByDocUser
=
[
sql
|
WITH input_rows(terms) AS (?),
...
...
@@ -408,8 +436,11 @@ getNodesByNgramsMaster ucId mcId = Map.unionsWith (<>)
-- . takeWhile (\l -> List.length l > 3)
<$>
mapM
(
selectNgramsByNodeMaster
1000
ucId
mcId
)
[
0
,
500
..
10000
]
selectNgramsByNodeMaster
::
Int
->
UserCorpusId
->
MasterCorpusId
->
Int
->
Cmd
err
[(
NodeId
,
Text
)]
selectNgramsByNodeMaster
::
Int
->
UserCorpusId
->
MasterCorpusId
->
Int
->
Cmd
err
[(
NodeId
,
Text
)]
selectNgramsByNodeMaster
n
ucId
mcId
p
=
runPGSQuery
queryNgramsByNodeMaster'
(
ucId
...
...
@@ -458,5 +489,3 @@ queryNgramsByNodeMaster' = [sql|
SELECT m.id, m.terms FROM nodesByNgramsMaster m
RIGHT JOIN nodesByNgramsUser u ON u.id = m.id
|]
src/Gargantext/Text/List.hs
View file @
8804c4e7
...
...
@@ -25,7 +25,7 @@ import Gargantext.API.Ngrams (NgramsElement, mkNgramsElement, RootParent(..), mS
import
Gargantext.API.Ngrams.Tools
(
getCoocByNgrams'
,
Diagonal
(
..
))
import
Gargantext.Core
(
Lang
(
..
))
import
Gargantext.Core.Types
(
ListType
(
..
),
MasterCorpusId
,
UserCorpusId
,
NodeId
)
import
Gargantext.Database.Action.Metrics.NgramsByNode
(
getTficf
'
,
sortTficf
,
ngramsGroup
,
getNodesByNgramsUser
,
groupNodesByNgramsWith
)
import
Gargantext.Database.Action.Metrics.NgramsByNode
(
getTficf
,
sortTficf
,
ngramsGroup
,
getNodesByNgramsUser
,
groupNodesByNgramsWith
)
import
Gargantext.Database.Admin.Utils
(
Cmd
)
import
Gargantext.Database.Schema.Ngrams
(
NgramsType
(
..
))
import
Gargantext.Prelude
...
...
@@ -56,7 +56,12 @@ data NgramsListBuilder = BuilderStepO { stemSize :: Int
data
StopSize
=
StopSize
{
unStopSize
::
Int
}
-- | TODO improve grouping functions of Authors, Sources, Institutes..
buildNgramsLists
::
Lang
->
Int
->
Int
->
StopSize
->
UserCorpusId
->
MasterCorpusId
buildNgramsLists
::
Lang
->
Int
->
Int
->
StopSize
->
UserCorpusId
->
MasterCorpusId
->
Cmd
err
(
Map
NgramsType
[
NgramsElement
])
buildNgramsLists
l
n
m
s
uCid
mCid
=
do
ngTerms
<-
buildNgramsTermsList
l
n
m
s
uCid
mCid
...
...
@@ -64,7 +69,9 @@ buildNgramsLists l n m s uCid mCid = do
pure
$
Map
.
unions
$
othersTerms
<>
[
ngTerms
]
buildNgramsOthersList
::
UserCorpusId
->
(
Text
->
Text
)
->
NgramsType
buildNgramsOthersList
::
UserCorpusId
->
(
Text
->
Text
)
->
NgramsType
->
Cmd
err
(
Map
NgramsType
[
NgramsElement
])
buildNgramsOthersList
uCid
groupIt
nt
=
do
ngs
<-
groupNodesByNgramsWith
groupIt
<$>
getNodesByNgramsUser
uCid
nt
...
...
@@ -83,12 +90,14 @@ buildNgramsOthersList uCid groupIt nt = do
)
]
--
{-
{-
buildNgramsTermsList' :: UserCorpusId
-> (Text -> Text)
->
((
Text
,
(
Set
Text
,
Set
NodeId
))
->
Bool
)
->
Int
->
Int
-> ((Text, (Set Text, Set NodeId)) -> Bool)
-> Int
-> Int
-> Cmd err (Map NgramsType [NgramsElement])
--}
buildNgramsTermsList' uCid groupIt stop gls is = do
ngs <- groupNodesByNgramsWith groupIt <$> getNodesByNgramsUser uCid NgramsTerms
...
...
@@ -117,21 +126,31 @@ buildNgramsTermsList' uCid groupIt stop gls is = do
<> map (\t -> (GraphTerm, toList' t)) m
pure $ Map.fromList [(NgramsTerms, ngs')]
-}
buildNgramsTermsList
::
Lang
->
Int
->
Int
->
StopSize
->
UserCorpusId
->
MasterCorpusId
buildNgramsTermsList
::
Lang
->
Int
->
Int
->
StopSize
->
UserCorpusId
->
MasterCorpusId
->
Cmd
err
(
Map
NgramsType
[
NgramsElement
])
buildNgramsTermsList
l
n
m
s
uCid
mCid
=
do
candidates
<-
sortTficf
<$>
getTficf'
uCid
mCid
NgramsTerms
(
ngramsGroup
l
n
m
)
candidates
<-
sortTficf
<$>
getTficf
uCid
mCid
NgramsTerms
(
ngramsGroup
l
n
m
)
let
candidatesSize
=
2000
a
=
10
b
=
10
candidatesHead
=
List
.
take
candidatesSize
candidates
candidatesTail
=
List
.
drop
candidatesSize
candidates
termList
=
(
toTermList
a
b
((
isStopTerm
s
)
.
fst
)
candidatesHead
)
<>
(
map
(
toList
((
isStopTerm
s
)
.
fst
)
CandidateTerm
)
candidatesTail
)
let
ngs
=
List
.
concat
$
map
toNgramsElement
termList
ngs
=
List
.
concat
$
map
toNgramsElement
termList
pure
$
Map
.
fromList
[(
NgramsTerms
,
ngs
)]
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment