Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
158
Issues
158
List
Board
Labels
Milestones
Merge Requests
11
Merge Requests
11
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
gargantext
haskell-gargantext
Commits
8804c4e7
Commit
8804c4e7
authored
Apr 21, 2020
by
Alexandre Delanoë
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
[CLEAN] metrics
parent
80fbde18
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
106 additions
and
56 deletions
+106
-56
List.hs
src/Gargantext/Database/Action/Flow/List.hs
+4
-2
NgramsByNode.hs
src/Gargantext/Database/Action/Metrics/NgramsByNode.hs
+65
-36
List.hs
src/Gargantext/Text/List.hs
+37
-18
No files found.
src/Gargantext/Database/Action/Flow/List.hs
View file @
8804c4e7
...
...
@@ -69,7 +69,9 @@ flowList_DbRepo lId ngs = do
-- Inserting groups of ngrams
_r
<-
insert_Node_NodeNgrams_NodeNgrams
$
map
(
\
(
a
,
b
)
->
Node_NodeNgrams_NodeNgrams
lId
a
b
Nothing
)
toInsert
listInsert
lId
ngs
--trace (show $ List.filter (\n -> _ne_ngrams n == "versatile") $ List.concat $ Map.elems ngs) $ listInsert lId ngs
pure
lId
------------------------------------------------------------------------
...
...
@@ -88,6 +90,7 @@ toNodeNgramsW l ngs = List.concat $ map (toNodeNgramsW'' l) ngs
(
NgramsElement
ngrams_terms'
_size
list_type
_occ
_root
_parent
_children
)
<-
elms
]
toNodeNgramsW'
::
ListId
->
[(
Text
,
[
NgramsType
])]
->
[
NodeNgramsW
]
...
...
@@ -102,8 +105,7 @@ listInsert :: FlowCmdM env err m
->
Map
NgramsType
[
NgramsElement
]
->
m
()
listInsert
lId
ngs
=
mapM_
(
\
(
typeList
,
ngElmts
)
->
putListNgrams
lId
typeList
ngElmts
)
$
toList
ngs
->
putListNgrams
lId
typeList
ngElmts
)
(
toList
ngs
)
------------------------------------------------------------------------
------------------------------------------------------------------------
src/Gargantext/Database/Action/Metrics/NgramsByNode.hs
View file @
8804c4e7
...
...
@@ -46,7 +46,11 @@ import qualified Database.PostgreSQL.Simple as DPS
-- discussed. Main purpose of this is offering
-- a first grouping option to user and get some
-- enriched data to better learn and improve that algo
ngramsGroup
::
Lang
->
Int
->
Int
->
Text
->
Text
ngramsGroup
::
Lang
->
Int
->
Int
->
Text
->
Text
ngramsGroup
l
_m
_n
=
Text
.
intercalate
" "
.
map
(
stem
l
)
-- . take n
...
...
@@ -61,18 +65,25 @@ sortTficf :: (Map Text (Double, Set Text))
sortTficf
=
List
.
sortOn
(
fst
.
snd
)
.
toList
getTficf'
::
UserCorpusId
->
MasterCorpusId
->
NgramsType
->
(
Text
->
Text
)
getTficf
::
UserCorpusId
->
MasterCorpusId
->
NgramsType
->
(
Text
->
Text
)
->
Cmd
err
(
Map
Text
(
Double
,
Set
Text
))
getTficf
'
u
m
nt
f
=
do
getTficf
u
m
nt
f
=
do
u'
<-
getNodesByNgramsUser
u
nt
m'
<-
getNodesByNgramsMaster
u
m
pure
$
toTficfData
(
countNodesByNgramsWith
f
u'
)
(
countNodesByNgramsWith
f
m'
)
getTficfWith
::
UserCorpusId
->
MasterCorpusId
->
[
ListId
]
->
NgramsType
->
Map
Text
(
Maybe
Text
)
->
Cmd
err
(
Map
Text
(
Double
,
Set
Text
))
{-
getTficfWith :: UserCorpusId
-> MasterCorpusId
-> [ListId]
-> NgramsType
-> Map Text (Maybe Text)
-> Cmd err (Map Text (Double, Set Text))
getTficfWith u m ls nt mtxt = do
u' <- getNodesByNgramsOnlyUser u ls nt (Map.keys mtxt)
m' <- getNodesByNgramsMaster u m
...
...
@@ -83,13 +94,14 @@ getTficfWith u m ls nt mtxt = do
pure $ toTficfData (countNodesByNgramsWith f u')
(countNodesByNgramsWith f m')
-}
type
Context
=
(
Double
,
Map
Text
(
Double
,
Set
Text
))
type
Supra
=
Context
type
Infra
=
Context
toTficfData
::
Infra
->
Supra
toTficfData
::
Infra
->
Supra
->
Map
Text
(
Double
,
Set
Text
)
toTficfData
(
ti
,
mi
)
(
ts
,
ms
)
=
fromList
[
(
t
,
(
tficf
(
TficfInfra
(
Count
n
)(
Total
ti
))
...
...
@@ -129,7 +141,8 @@ getNodesByNgramsUser cId nt =
<$>
selectNgramsByNodeUser
cId
nt
where
selectNgramsByNodeUser
::
CorpusId
->
NgramsType
selectNgramsByNodeUser
::
CorpusId
->
NgramsType
->
Cmd
err
[(
NodeId
,
Text
)]
selectNgramsByNodeUser
cId'
nt'
=
runPGSQuery
queryNgramsByNodeUser
...
...
@@ -172,10 +185,10 @@ getOccByNgramsOnlyFast' :: CorpusId
->
Cmd
err
(
Map
Text
Int
)
getOccByNgramsOnlyFast'
cId
lId
nt
tms
=
trace
(
show
(
cId
,
lId
))
$
fromListWith
(
+
)
<$>
map
(
second
round
)
<$>
run
cId
lId
nt
tms
where
fields
=
[
QualifiedIdentifier
Nothing
"text"
]
run
::
CorpusId
->
ListId
->
NgramsType
...
...
@@ -202,8 +215,6 @@ getOccByNgramsOnlyFast' cId lId nt tms = trace (show (cId, lId)) $
|]
-- just slower than getOccByNgramsOnlyFast
getOccByNgramsOnlySlow
::
NodeType
->
CorpusId
...
...
@@ -228,12 +239,15 @@ getOccByNgramsOnlySafe cId ls nt ngs = do
fast
<-
getOccByNgramsOnlyFast
cId
nt
ngs
slow
<-
getOccByNgramsOnlySlow
NodeCorpus
cId
ls
nt
ngs
when
(
fast
/=
slow
)
$
printDebug
"getOccByNgramsOnlySafe: difference"
(
diff
slow
fast
::
PatchMap
Text
(
Replace
(
Maybe
Int
)))
printDebug
"getOccByNgramsOnlySafe: difference"
(
diff
slow
fast
::
PatchMap
Text
(
Replace
(
Maybe
Int
)))
pure
slow
selectNgramsOccurrencesOnlyByNodeUser
::
CorpusId
->
NgramsType
->
[
Text
]
->
Cmd
err
[(
Text
,
Int
)]
selectNgramsOccurrencesOnlyByNodeUser
::
CorpusId
->
NgramsType
->
[
Text
]
->
Cmd
err
[(
Text
,
Int
)]
selectNgramsOccurrencesOnlyByNodeUser
cId
nt
tms
=
runPGSQuery
queryNgramsOccurrencesOnlyByNodeUser
(
Values
fields
(
DPS
.
Only
<$>
tms
)
...
...
@@ -262,8 +276,6 @@ queryNgramsOccurrencesOnlyByNodeUser = [sql|
GROUP BY nng.node2_id, ng.terms
|]
queryNgramsOccurrencesOnlyByNodeUser'
::
DPS
.
Query
queryNgramsOccurrencesOnlyByNodeUser'
=
[
sql
|
WITH input_rows(terms) AS (?)
...
...
@@ -280,8 +292,10 @@ queryNgramsOccurrencesOnlyByNodeUser' = [sql|
|]
------------------------------------------------------------------------
getNodesByNgramsOnlyUser
::
NodeId
->
[
ListId
]
->
NgramsType
->
[
Text
]
getNodesByNgramsOnlyUser
::
NodeId
->
[
ListId
]
->
NgramsType
->
[
Text
]
->
Cmd
err
(
Map
Text
(
Set
NodeId
))
getNodesByNgramsOnlyUser
cId
ls
nt
ngs
=
Map
.
unionsWith
(
<>
)
...
...
@@ -305,12 +319,16 @@ getNgramsByNodeOnlyUser cId ls nt ngs =
(
splitEvery
1000
ngs
)
------------------------------------------------------------------------
selectNgramsOnlyByNodeUser
::
CorpusId
->
[
ListId
]
->
NgramsType
->
[
Text
]
selectNgramsOnlyByNodeUser
::
CorpusId
->
[
ListId
]
->
NgramsType
->
[
Text
]
->
Cmd
err
[(
Text
,
NodeId
)]
selectNgramsOnlyByNodeUser
cId
ls
nt
tms
=
runPGSQuery
queryNgramsOnlyByNodeUser
(
Values
fields
(
DPS
.
Only
<$>
tms
)
,
Values
[
QualifiedIdentifier
Nothing
"int4"
]
(
DPS
.
Only
<$>
(
map
(
\
(
NodeId
n
)
->
n
)
ls
))
,
Values
[
QualifiedIdentifier
Nothing
"int4"
]
(
DPS
.
Only
<$>
(
map
(
\
(
NodeId
n
)
->
n
)
ls
))
,
cId
,
nodeTypeId
NodeDocument
,
ngramsTypeId
nt
...
...
@@ -336,13 +354,16 @@ queryNgramsOnlyByNodeUser = [sql|
|]
selectNgramsOnlyByNodeUser'
::
CorpusId
->
[
ListId
]
->
NgramsType
->
[
Text
]
->
Cmd
err
[(
Text
,
Int
)]
selectNgramsOnlyByNodeUser'
::
CorpusId
->
[
ListId
]
->
NgramsType
->
[
Text
]
->
Cmd
err
[(
Text
,
Int
)]
selectNgramsOnlyByNodeUser'
cId
ls
nt
tms
=
runPGSQuery
queryNgramsOnlyByNodeUser
(
Values
fields
(
DPS
.
Only
<$>
tms
)
,
Values
[
QualifiedIdentifier
Nothing
"int4"
]
(
DPS
.
Only
<$>
(
map
(
\
(
NodeId
n
)
->
n
)
ls
))
,
Values
[
QualifiedIdentifier
Nothing
"int4"
]
(
DPS
.
Only
<$>
(
map
(
\
(
NodeId
n
)
->
n
)
ls
))
,
cId
,
nodeTypeId
NodeDocument
,
ngramsTypeId
nt
...
...
@@ -365,27 +386,34 @@ queryNgramsOnlyByNodeUser' = [sql|
|]
getNgramsByDocOnlyUser
::
NodeId
->
[
ListId
]
->
NgramsType
->
[
Text
]
->
Cmd
err
(
Map
Text
(
Set
NodeId
))
getNgramsByDocOnlyUser
::
NodeId
->
[
ListId
]
->
NgramsType
->
[
Text
]
->
Cmd
err
(
Map
Text
(
Set
NodeId
))
getNgramsByDocOnlyUser
cId
ls
nt
ngs
=
Map
.
unionsWith
(
<>
)
.
map
(
fromListWith
(
<>
)
.
map
(
second
Set
.
singleton
))
<$>
mapM
(
selectNgramsOnlyByDocUser
cId
ls
nt
)
(
splitEvery
1000
ngs
)
selectNgramsOnlyByDocUser
::
DocId
->
[
ListId
]
->
NgramsType
->
[
Text
]
->
Cmd
err
[(
Text
,
NodeId
)]
selectNgramsOnlyByDocUser
::
DocId
->
[
ListId
]
->
NgramsType
->
[
Text
]
->
Cmd
err
[(
Text
,
NodeId
)]
selectNgramsOnlyByDocUser
dId
ls
nt
tms
=
runPGSQuery
queryNgramsOnlyByDocUser
(
Values
fields
(
DPS
.
Only
<$>
tms
)
,
Values
[
QualifiedIdentifier
Nothing
"int4"
]
(
DPS
.
Only
<$>
(
map
(
\
(
NodeId
n
)
->
n
)
ls
))
,
Values
[
QualifiedIdentifier
Nothing
"int4"
]
(
DPS
.
Only
<$>
(
map
(
\
(
NodeId
n
)
->
n
)
ls
))
,
dId
,
ngramsTypeId
nt
)
where
fields
=
[
QualifiedIdentifier
Nothing
"text"
]
queryNgramsOnlyByDocUser
::
DPS
.
Query
queryNgramsOnlyByDocUser
=
[
sql
|
WITH input_rows(terms) AS (?),
...
...
@@ -408,8 +436,11 @@ getNodesByNgramsMaster ucId mcId = Map.unionsWith (<>)
-- . takeWhile (\l -> List.length l > 3)
<$>
mapM
(
selectNgramsByNodeMaster
1000
ucId
mcId
)
[
0
,
500
..
10000
]
selectNgramsByNodeMaster
::
Int
->
UserCorpusId
->
MasterCorpusId
->
Int
->
Cmd
err
[(
NodeId
,
Text
)]
selectNgramsByNodeMaster
::
Int
->
UserCorpusId
->
MasterCorpusId
->
Int
->
Cmd
err
[(
NodeId
,
Text
)]
selectNgramsByNodeMaster
n
ucId
mcId
p
=
runPGSQuery
queryNgramsByNodeMaster'
(
ucId
...
...
@@ -458,5 +489,3 @@ queryNgramsByNodeMaster' = [sql|
SELECT m.id, m.terms FROM nodesByNgramsMaster m
RIGHT JOIN nodesByNgramsUser u ON u.id = m.id
|]
src/Gargantext/Text/List.hs
View file @
8804c4e7
...
...
@@ -25,7 +25,7 @@ import Gargantext.API.Ngrams (NgramsElement, mkNgramsElement, RootParent(..), mS
import
Gargantext.API.Ngrams.Tools
(
getCoocByNgrams'
,
Diagonal
(
..
))
import
Gargantext.Core
(
Lang
(
..
))
import
Gargantext.Core.Types
(
ListType
(
..
),
MasterCorpusId
,
UserCorpusId
,
NodeId
)
import
Gargantext.Database.Action.Metrics.NgramsByNode
(
getTficf
'
,
sortTficf
,
ngramsGroup
,
getNodesByNgramsUser
,
groupNodesByNgramsWith
)
import
Gargantext.Database.Action.Metrics.NgramsByNode
(
getTficf
,
sortTficf
,
ngramsGroup
,
getNodesByNgramsUser
,
groupNodesByNgramsWith
)
import
Gargantext.Database.Admin.Utils
(
Cmd
)
import
Gargantext.Database.Schema.Ngrams
(
NgramsType
(
..
))
import
Gargantext.Prelude
...
...
@@ -33,8 +33,8 @@ import Gargantext.Text.List.Learn (Model(..))
import
Gargantext.Text.Metrics
(
takeScored
)
import
qualified
Data.Char
as
Char
import
qualified
Data.List
as
List
import
qualified
Data.Map
as
Map
import
qualified
Data.Set
as
Set
import
qualified
Data.Map
as
Map
import
qualified
Data.Set
as
Set
import
qualified
Data.Text
as
Text
...
...
@@ -44,11 +44,11 @@ data NgramsListBuilder = BuilderStepO { stemSize :: Int
}
|
BuilderStep1
{
withModel
::
Model
}
|
BuilderStepN
{
withModel
::
Model
}
|
Tficf
{
nlb_lang
::
Lang
,
nlb_group1
::
Int
,
nlb_group2
::
Int
,
nlb_stopSize
::
StopSize
,
nlb_userCorpusId
::
UserCorpusId
|
Tficf
{
nlb_lang
::
Lang
,
nlb_group1
::
Int
,
nlb_group2
::
Int
,
nlb_stopSize
::
StopSize
,
nlb_userCorpusId
::
UserCorpusId
,
nlb_masterCorpusId
::
MasterCorpusId
}
...
...
@@ -56,7 +56,12 @@ data NgramsListBuilder = BuilderStepO { stemSize :: Int
data
StopSize
=
StopSize
{
unStopSize
::
Int
}
-- | TODO improve grouping functions of Authors, Sources, Institutes..
buildNgramsLists
::
Lang
->
Int
->
Int
->
StopSize
->
UserCorpusId
->
MasterCorpusId
buildNgramsLists
::
Lang
->
Int
->
Int
->
StopSize
->
UserCorpusId
->
MasterCorpusId
->
Cmd
err
(
Map
NgramsType
[
NgramsElement
])
buildNgramsLists
l
n
m
s
uCid
mCid
=
do
ngTerms
<-
buildNgramsTermsList
l
n
m
s
uCid
mCid
...
...
@@ -64,7 +69,9 @@ buildNgramsLists l n m s uCid mCid = do
pure
$
Map
.
unions
$
othersTerms
<>
[
ngTerms
]
buildNgramsOthersList
::
UserCorpusId
->
(
Text
->
Text
)
->
NgramsType
buildNgramsOthersList
::
UserCorpusId
->
(
Text
->
Text
)
->
NgramsType
->
Cmd
err
(
Map
NgramsType
[
NgramsElement
])
buildNgramsOthersList
uCid
groupIt
nt
=
do
ngs
<-
groupNodesByNgramsWith
groupIt
<$>
getNodesByNgramsUser
uCid
nt
...
...
@@ -83,12 +90,14 @@ buildNgramsOthersList uCid groupIt nt = do
)
]
--
{-
{-
buildNgramsTermsList' :: UserCorpusId
-> (Text -> Text)
->
((
Text
,
(
Set
Text
,
Set
NodeId
))
->
Bool
)
->
Int
->
Int
-> ((Text, (Set Text, Set NodeId)) -> Bool)
-> Int
-> Int
-> Cmd err (Map NgramsType [NgramsElement])
--}
buildNgramsTermsList' uCid groupIt stop gls is = do
ngs <- groupNodesByNgramsWith groupIt <$> getNodesByNgramsUser uCid NgramsTerms
...
...
@@ -117,21 +126,31 @@ buildNgramsTermsList' uCid groupIt stop gls is = do
<> map (\t -> (GraphTerm, toList' t)) m
pure $ Map.fromList [(NgramsTerms, ngs')]
-}
buildNgramsTermsList
::
Lang
->
Int
->
Int
->
StopSize
->
UserCorpusId
->
MasterCorpusId
buildNgramsTermsList
::
Lang
->
Int
->
Int
->
StopSize
->
UserCorpusId
->
MasterCorpusId
->
Cmd
err
(
Map
NgramsType
[
NgramsElement
])
buildNgramsTermsList
l
n
m
s
uCid
mCid
=
do
candidates
<-
sortTficf
<$>
getTficf'
uCid
mCid
NgramsTerms
(
ngramsGroup
l
n
m
)
candidates
<-
sortTficf
<$>
getTficf
uCid
mCid
NgramsTerms
(
ngramsGroup
l
n
m
)
let
candidatesSize
=
2000
a
=
10
b
=
10
candidatesHead
=
List
.
take
candidatesSize
candidates
candidatesTail
=
List
.
drop
candidatesSize
candidates
termList
=
(
toTermList
a
b
((
isStopTerm
s
)
.
fst
)
candidatesHead
)
<>
(
map
(
toList
((
isStopTerm
s
)
.
fst
)
CandidateTerm
)
candidatesTail
)
let
ngs
=
List
.
concat
$
map
toNgramsElement
termList
ngs
=
List
.
concat
$
map
toNgramsElement
termList
pure
$
Map
.
fromList
[(
NgramsTerms
,
ngs
)]
...
...
@@ -167,7 +186,7 @@ toTermList a b stop ns = -- trace ("computing toTermList") $
where
xs
=
take
a
ns
ta
=
drop
a
ns
ys
=
take
b
ta
zs
=
drop
b
ta
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment