Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
158
Issues
158
List
Board
Labels
Milestones
Merge Requests
11
Merge Requests
11
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
gargantext
haskell-gargantext
Commits
8804c4e7
Commit
8804c4e7
authored
Apr 21, 2020
by
Alexandre Delanoë
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
[CLEAN] metrics
parent
80fbde18
Changes
3
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
106 additions
and
56 deletions
+106
-56
List.hs
src/Gargantext/Database/Action/Flow/List.hs
+4
-2
NgramsByNode.hs
src/Gargantext/Database/Action/Metrics/NgramsByNode.hs
+65
-36
List.hs
src/Gargantext/Text/List.hs
+37
-18
No files found.
src/Gargantext/Database/Action/Flow/List.hs
View file @
8804c4e7
...
@@ -69,7 +69,9 @@ flowList_DbRepo lId ngs = do
...
@@ -69,7 +69,9 @@ flowList_DbRepo lId ngs = do
-- Inserting groups of ngrams
-- Inserting groups of ngrams
_r
<-
insert_Node_NodeNgrams_NodeNgrams
_r
<-
insert_Node_NodeNgrams_NodeNgrams
$
map
(
\
(
a
,
b
)
->
Node_NodeNgrams_NodeNgrams
lId
a
b
Nothing
)
toInsert
$
map
(
\
(
a
,
b
)
->
Node_NodeNgrams_NodeNgrams
lId
a
b
Nothing
)
toInsert
listInsert
lId
ngs
listInsert
lId
ngs
--trace (show $ List.filter (\n -> _ne_ngrams n == "versatile") $ List.concat $ Map.elems ngs) $ listInsert lId ngs
--trace (show $ List.filter (\n -> _ne_ngrams n == "versatile") $ List.concat $ Map.elems ngs) $ listInsert lId ngs
pure
lId
pure
lId
------------------------------------------------------------------------
------------------------------------------------------------------------
...
@@ -88,6 +90,7 @@ toNodeNgramsW l ngs = List.concat $ map (toNodeNgramsW'' l) ngs
...
@@ -88,6 +90,7 @@ toNodeNgramsW l ngs = List.concat $ map (toNodeNgramsW'' l) ngs
(
NgramsElement
ngrams_terms'
_size
list_type
_occ
_root
_parent
_children
)
<-
elms
(
NgramsElement
ngrams_terms'
_size
list_type
_occ
_root
_parent
_children
)
<-
elms
]
]
toNodeNgramsW'
::
ListId
toNodeNgramsW'
::
ListId
->
[(
Text
,
[
NgramsType
])]
->
[(
Text
,
[
NgramsType
])]
->
[
NodeNgramsW
]
->
[
NodeNgramsW
]
...
@@ -102,8 +105,7 @@ listInsert :: FlowCmdM env err m
...
@@ -102,8 +105,7 @@ listInsert :: FlowCmdM env err m
->
Map
NgramsType
[
NgramsElement
]
->
Map
NgramsType
[
NgramsElement
]
->
m
()
->
m
()
listInsert
lId
ngs
=
mapM_
(
\
(
typeList
,
ngElmts
)
listInsert
lId
ngs
=
mapM_
(
\
(
typeList
,
ngElmts
)
->
putListNgrams
lId
typeList
ngElmts
->
putListNgrams
lId
typeList
ngElmts
)
(
toList
ngs
)
)
$
toList
ngs
------------------------------------------------------------------------
------------------------------------------------------------------------
------------------------------------------------------------------------
------------------------------------------------------------------------
src/Gargantext/Database/Action/Metrics/NgramsByNode.hs
View file @
8804c4e7
...
@@ -46,7 +46,11 @@ import qualified Database.PostgreSQL.Simple as DPS
...
@@ -46,7 +46,11 @@ import qualified Database.PostgreSQL.Simple as DPS
-- discussed. Main purpose of this is offering
-- discussed. Main purpose of this is offering
-- a first grouping option to user and get some
-- a first grouping option to user and get some
-- enriched data to better learn and improve that algo
-- enriched data to better learn and improve that algo
ngramsGroup
::
Lang
->
Int
->
Int
->
Text
->
Text
ngramsGroup
::
Lang
->
Int
->
Int
->
Text
->
Text
ngramsGroup
l
_m
_n
=
Text
.
intercalate
" "
ngramsGroup
l
_m
_n
=
Text
.
intercalate
" "
.
map
(
stem
l
)
.
map
(
stem
l
)
-- . take n
-- . take n
...
@@ -61,17 +65,24 @@ sortTficf :: (Map Text (Double, Set Text))
...
@@ -61,17 +65,24 @@ sortTficf :: (Map Text (Double, Set Text))
sortTficf
=
List
.
sortOn
(
fst
.
snd
)
.
toList
sortTficf
=
List
.
sortOn
(
fst
.
snd
)
.
toList
getTficf'
::
UserCorpusId
->
MasterCorpusId
->
NgramsType
->
(
Text
->
Text
)
getTficf
::
UserCorpusId
->
MasterCorpusId
->
NgramsType
->
(
Text
->
Text
)
->
Cmd
err
(
Map
Text
(
Double
,
Set
Text
))
->
Cmd
err
(
Map
Text
(
Double
,
Set
Text
))
getTficf
'
u
m
nt
f
=
do
getTficf
u
m
nt
f
=
do
u'
<-
getNodesByNgramsUser
u
nt
u'
<-
getNodesByNgramsUser
u
nt
m'
<-
getNodesByNgramsMaster
u
m
m'
<-
getNodesByNgramsMaster
u
m
pure
$
toTficfData
(
countNodesByNgramsWith
f
u'
)
pure
$
toTficfData
(
countNodesByNgramsWith
f
u'
)
(
countNodesByNgramsWith
f
m'
)
(
countNodesByNgramsWith
f
m'
)
getTficfWith
::
UserCorpusId
->
MasterCorpusId
->
[
ListId
]
{-
->
NgramsType
->
Map
Text
(
Maybe
Text
)
getTficfWith :: UserCorpusId
-> MasterCorpusId
-> [ListId]
-> NgramsType
-> Map Text (Maybe Text)
-> Cmd err (Map Text (Double, Set Text))
-> Cmd err (Map Text (Double, Set Text))
getTficfWith u m ls nt mtxt = do
getTficfWith u m ls nt mtxt = do
u' <- getNodesByNgramsOnlyUser u ls nt (Map.keys mtxt)
u' <- getNodesByNgramsOnlyUser u ls nt (Map.keys mtxt)
...
@@ -83,13 +94,14 @@ getTficfWith u m ls nt mtxt = do
...
@@ -83,13 +94,14 @@ getTficfWith u m ls nt mtxt = do
pure $ toTficfData (countNodesByNgramsWith f u')
pure $ toTficfData (countNodesByNgramsWith f u')
(countNodesByNgramsWith f m')
(countNodesByNgramsWith f m')
-}
type
Context
=
(
Double
,
Map
Text
(
Double
,
Set
Text
))
type
Context
=
(
Double
,
Map
Text
(
Double
,
Set
Text
))
type
Supra
=
Context
type
Supra
=
Context
type
Infra
=
Context
type
Infra
=
Context
toTficfData
::
Infra
->
Supra
toTficfData
::
Infra
->
Supra
->
Map
Text
(
Double
,
Set
Text
)
->
Map
Text
(
Double
,
Set
Text
)
toTficfData
(
ti
,
mi
)
(
ts
,
ms
)
=
toTficfData
(
ti
,
mi
)
(
ts
,
ms
)
=
fromList
[
(
t
,
(
tficf
(
TficfInfra
(
Count
n
)(
Total
ti
))
fromList
[
(
t
,
(
tficf
(
TficfInfra
(
Count
n
)(
Total
ti
))
...
@@ -129,7 +141,8 @@ getNodesByNgramsUser cId nt =
...
@@ -129,7 +141,8 @@ getNodesByNgramsUser cId nt =
<$>
selectNgramsByNodeUser
cId
nt
<$>
selectNgramsByNodeUser
cId
nt
where
where
selectNgramsByNodeUser
::
CorpusId
->
NgramsType
selectNgramsByNodeUser
::
CorpusId
->
NgramsType
->
Cmd
err
[(
NodeId
,
Text
)]
->
Cmd
err
[(
NodeId
,
Text
)]
selectNgramsByNodeUser
cId'
nt'
=
selectNgramsByNodeUser
cId'
nt'
=
runPGSQuery
queryNgramsByNodeUser
runPGSQuery
queryNgramsByNodeUser
...
@@ -202,8 +215,6 @@ getOccByNgramsOnlyFast' cId lId nt tms = trace (show (cId, lId)) $
...
@@ -202,8 +215,6 @@ getOccByNgramsOnlyFast' cId lId nt tms = trace (show (cId, lId)) $
|]
|]
-- just slower than getOccByNgramsOnlyFast
-- just slower than getOccByNgramsOnlyFast
getOccByNgramsOnlySlow
::
NodeType
getOccByNgramsOnlySlow
::
NodeType
->
CorpusId
->
CorpusId
...
@@ -228,11 +239,14 @@ getOccByNgramsOnlySafe cId ls nt ngs = do
...
@@ -228,11 +239,14 @@ getOccByNgramsOnlySafe cId ls nt ngs = do
fast
<-
getOccByNgramsOnlyFast
cId
nt
ngs
fast
<-
getOccByNgramsOnlyFast
cId
nt
ngs
slow
<-
getOccByNgramsOnlySlow
NodeCorpus
cId
ls
nt
ngs
slow
<-
getOccByNgramsOnlySlow
NodeCorpus
cId
ls
nt
ngs
when
(
fast
/=
slow
)
$
when
(
fast
/=
slow
)
$
printDebug
"getOccByNgramsOnlySafe: difference"
(
diff
slow
fast
::
PatchMap
Text
(
Replace
(
Maybe
Int
)))
printDebug
"getOccByNgramsOnlySafe: difference"
(
diff
slow
fast
::
PatchMap
Text
(
Replace
(
Maybe
Int
)))
pure
slow
pure
slow
selectNgramsOccurrencesOnlyByNodeUser
::
CorpusId
->
NgramsType
->
[
Text
]
selectNgramsOccurrencesOnlyByNodeUser
::
CorpusId
->
NgramsType
->
[
Text
]
->
Cmd
err
[(
Text
,
Int
)]
->
Cmd
err
[(
Text
,
Int
)]
selectNgramsOccurrencesOnlyByNodeUser
cId
nt
tms
=
selectNgramsOccurrencesOnlyByNodeUser
cId
nt
tms
=
runPGSQuery
queryNgramsOccurrencesOnlyByNodeUser
runPGSQuery
queryNgramsOccurrencesOnlyByNodeUser
...
@@ -262,8 +276,6 @@ queryNgramsOccurrencesOnlyByNodeUser = [sql|
...
@@ -262,8 +276,6 @@ queryNgramsOccurrencesOnlyByNodeUser = [sql|
GROUP BY nng.node2_id, ng.terms
GROUP BY nng.node2_id, ng.terms
|]
|]
queryNgramsOccurrencesOnlyByNodeUser'
::
DPS
.
Query
queryNgramsOccurrencesOnlyByNodeUser'
::
DPS
.
Query
queryNgramsOccurrencesOnlyByNodeUser'
=
[
sql
|
queryNgramsOccurrencesOnlyByNodeUser'
=
[
sql
|
WITH input_rows(terms) AS (?)
WITH input_rows(terms) AS (?)
...
@@ -280,8 +292,10 @@ queryNgramsOccurrencesOnlyByNodeUser' = [sql|
...
@@ -280,8 +292,10 @@ queryNgramsOccurrencesOnlyByNodeUser' = [sql|
|]
|]
------------------------------------------------------------------------
------------------------------------------------------------------------
getNodesByNgramsOnlyUser
::
NodeId
getNodesByNgramsOnlyUser
::
NodeId
->
[
ListId
]
->
NgramsType
->
[
Text
]
->
[
ListId
]
->
NgramsType
->
[
Text
]
->
Cmd
err
(
Map
Text
(
Set
NodeId
))
->
Cmd
err
(
Map
Text
(
Set
NodeId
))
getNodesByNgramsOnlyUser
cId
ls
nt
ngs
=
getNodesByNgramsOnlyUser
cId
ls
nt
ngs
=
Map
.
unionsWith
(
<>
)
Map
.
unionsWith
(
<>
)
...
@@ -305,12 +319,16 @@ getNgramsByNodeOnlyUser cId ls nt ngs =
...
@@ -305,12 +319,16 @@ getNgramsByNodeOnlyUser cId ls nt ngs =
(
splitEvery
1000
ngs
)
(
splitEvery
1000
ngs
)
------------------------------------------------------------------------
------------------------------------------------------------------------
selectNgramsOnlyByNodeUser
::
CorpusId
->
[
ListId
]
->
NgramsType
->
[
Text
]
selectNgramsOnlyByNodeUser
::
CorpusId
->
[
ListId
]
->
NgramsType
->
[
Text
]
->
Cmd
err
[(
Text
,
NodeId
)]
->
Cmd
err
[(
Text
,
NodeId
)]
selectNgramsOnlyByNodeUser
cId
ls
nt
tms
=
selectNgramsOnlyByNodeUser
cId
ls
nt
tms
=
runPGSQuery
queryNgramsOnlyByNodeUser
runPGSQuery
queryNgramsOnlyByNodeUser
(
Values
fields
(
DPS
.
Only
<$>
tms
)
(
Values
fields
(
DPS
.
Only
<$>
tms
)
,
Values
[
QualifiedIdentifier
Nothing
"int4"
]
(
DPS
.
Only
<$>
(
map
(
\
(
NodeId
n
)
->
n
)
ls
))
,
Values
[
QualifiedIdentifier
Nothing
"int4"
]
(
DPS
.
Only
<$>
(
map
(
\
(
NodeId
n
)
->
n
)
ls
))
,
cId
,
cId
,
nodeTypeId
NodeDocument
,
nodeTypeId
NodeDocument
,
ngramsTypeId
nt
,
ngramsTypeId
nt
...
@@ -336,13 +354,16 @@ queryNgramsOnlyByNodeUser = [sql|
...
@@ -336,13 +354,16 @@ queryNgramsOnlyByNodeUser = [sql|
|]
|]
selectNgramsOnlyByNodeUser'
::
CorpusId
selectNgramsOnlyByNodeUser'
::
CorpusId
->
[
ListId
]
->
NgramsType
->
[
Text
]
->
[
ListId
]
->
NgramsType
->
[
Text
]
->
Cmd
err
[(
Text
,
Int
)]
->
Cmd
err
[(
Text
,
Int
)]
selectNgramsOnlyByNodeUser'
cId
ls
nt
tms
=
selectNgramsOnlyByNodeUser'
cId
ls
nt
tms
=
runPGSQuery
queryNgramsOnlyByNodeUser
runPGSQuery
queryNgramsOnlyByNodeUser
(
Values
fields
(
DPS
.
Only
<$>
tms
)
(
Values
fields
(
DPS
.
Only
<$>
tms
)
,
Values
[
QualifiedIdentifier
Nothing
"int4"
]
(
DPS
.
Only
<$>
(
map
(
\
(
NodeId
n
)
->
n
)
ls
))
,
Values
[
QualifiedIdentifier
Nothing
"int4"
]
(
DPS
.
Only
<$>
(
map
(
\
(
NodeId
n
)
->
n
)
ls
))
,
cId
,
cId
,
nodeTypeId
NodeDocument
,
nodeTypeId
NodeDocument
,
ngramsTypeId
nt
,
ngramsTypeId
nt
...
@@ -365,8 +386,10 @@ queryNgramsOnlyByNodeUser' = [sql|
...
@@ -365,8 +386,10 @@ queryNgramsOnlyByNodeUser' = [sql|
|]
|]
getNgramsByDocOnlyUser
::
NodeId
getNgramsByDocOnlyUser
::
NodeId
->
[
ListId
]
->
NgramsType
->
[
Text
]
->
[
ListId
]
->
NgramsType
->
[
Text
]
->
Cmd
err
(
Map
Text
(
Set
NodeId
))
->
Cmd
err
(
Map
Text
(
Set
NodeId
))
getNgramsByDocOnlyUser
cId
ls
nt
ngs
=
getNgramsByDocOnlyUser
cId
ls
nt
ngs
=
Map
.
unionsWith
(
<>
)
Map
.
unionsWith
(
<>
)
...
@@ -374,18 +397,23 @@ getNgramsByDocOnlyUser cId ls nt ngs =
...
@@ -374,18 +397,23 @@ getNgramsByDocOnlyUser cId ls nt ngs =
<$>
mapM
(
selectNgramsOnlyByDocUser
cId
ls
nt
)
(
splitEvery
1000
ngs
)
<$>
mapM
(
selectNgramsOnlyByDocUser
cId
ls
nt
)
(
splitEvery
1000
ngs
)
selectNgramsOnlyByDocUser
::
DocId
->
[
ListId
]
->
NgramsType
->
[
Text
]
selectNgramsOnlyByDocUser
::
DocId
->
[
ListId
]
->
NgramsType
->
[
Text
]
->
Cmd
err
[(
Text
,
NodeId
)]
->
Cmd
err
[(
Text
,
NodeId
)]
selectNgramsOnlyByDocUser
dId
ls
nt
tms
=
selectNgramsOnlyByDocUser
dId
ls
nt
tms
=
runPGSQuery
queryNgramsOnlyByDocUser
runPGSQuery
queryNgramsOnlyByDocUser
(
Values
fields
(
DPS
.
Only
<$>
tms
)
(
Values
fields
(
DPS
.
Only
<$>
tms
)
,
Values
[
QualifiedIdentifier
Nothing
"int4"
]
(
DPS
.
Only
<$>
(
map
(
\
(
NodeId
n
)
->
n
)
ls
))
,
Values
[
QualifiedIdentifier
Nothing
"int4"
]
(
DPS
.
Only
<$>
(
map
(
\
(
NodeId
n
)
->
n
)
ls
))
,
dId
,
dId
,
ngramsTypeId
nt
,
ngramsTypeId
nt
)
)
where
where
fields
=
[
QualifiedIdentifier
Nothing
"text"
]
fields
=
[
QualifiedIdentifier
Nothing
"text"
]
queryNgramsOnlyByDocUser
::
DPS
.
Query
queryNgramsOnlyByDocUser
::
DPS
.
Query
queryNgramsOnlyByDocUser
=
[
sql
|
queryNgramsOnlyByDocUser
=
[
sql
|
WITH input_rows(terms) AS (?),
WITH input_rows(terms) AS (?),
...
@@ -408,8 +436,11 @@ getNodesByNgramsMaster ucId mcId = Map.unionsWith (<>)
...
@@ -408,8 +436,11 @@ getNodesByNgramsMaster ucId mcId = Map.unionsWith (<>)
-- . takeWhile (\l -> List.length l > 3)
-- . takeWhile (\l -> List.length l > 3)
<$>
mapM
(
selectNgramsByNodeMaster
1000
ucId
mcId
)
[
0
,
500
..
10000
]
<$>
mapM
(
selectNgramsByNodeMaster
1000
ucId
mcId
)
[
0
,
500
..
10000
]
selectNgramsByNodeMaster
::
Int
selectNgramsByNodeMaster
::
Int
->
UserCorpusId
->
MasterCorpusId
->
Int
->
Cmd
err
[(
NodeId
,
Text
)]
->
UserCorpusId
->
MasterCorpusId
->
Int
->
Cmd
err
[(
NodeId
,
Text
)]
selectNgramsByNodeMaster
n
ucId
mcId
p
=
runPGSQuery
selectNgramsByNodeMaster
n
ucId
mcId
p
=
runPGSQuery
queryNgramsByNodeMaster'
queryNgramsByNodeMaster'
(
ucId
(
ucId
...
@@ -458,5 +489,3 @@ queryNgramsByNodeMaster' = [sql|
...
@@ -458,5 +489,3 @@ queryNgramsByNodeMaster' = [sql|
SELECT m.id, m.terms FROM nodesByNgramsMaster m
SELECT m.id, m.terms FROM nodesByNgramsMaster m
RIGHT JOIN nodesByNgramsUser u ON u.id = m.id
RIGHT JOIN nodesByNgramsUser u ON u.id = m.id
|]
|]
src/Gargantext/Text/List.hs
View file @
8804c4e7
...
@@ -25,7 +25,7 @@ import Gargantext.API.Ngrams (NgramsElement, mkNgramsElement, RootParent(..), mS
...
@@ -25,7 +25,7 @@ import Gargantext.API.Ngrams (NgramsElement, mkNgramsElement, RootParent(..), mS
import
Gargantext.API.Ngrams.Tools
(
getCoocByNgrams'
,
Diagonal
(
..
))
import
Gargantext.API.Ngrams.Tools
(
getCoocByNgrams'
,
Diagonal
(
..
))
import
Gargantext.Core
(
Lang
(
..
))
import
Gargantext.Core
(
Lang
(
..
))
import
Gargantext.Core.Types
(
ListType
(
..
),
MasterCorpusId
,
UserCorpusId
,
NodeId
)
import
Gargantext.Core.Types
(
ListType
(
..
),
MasterCorpusId
,
UserCorpusId
,
NodeId
)
import
Gargantext.Database.Action.Metrics.NgramsByNode
(
getTficf
'
,
sortTficf
,
ngramsGroup
,
getNodesByNgramsUser
,
groupNodesByNgramsWith
)
import
Gargantext.Database.Action.Metrics.NgramsByNode
(
getTficf
,
sortTficf
,
ngramsGroup
,
getNodesByNgramsUser
,
groupNodesByNgramsWith
)
import
Gargantext.Database.Admin.Utils
(
Cmd
)
import
Gargantext.Database.Admin.Utils
(
Cmd
)
import
Gargantext.Database.Schema.Ngrams
(
NgramsType
(
..
))
import
Gargantext.Database.Schema.Ngrams
(
NgramsType
(
..
))
import
Gargantext.Prelude
import
Gargantext.Prelude
...
@@ -56,7 +56,12 @@ data NgramsListBuilder = BuilderStepO { stemSize :: Int
...
@@ -56,7 +56,12 @@ data NgramsListBuilder = BuilderStepO { stemSize :: Int
data
StopSize
=
StopSize
{
unStopSize
::
Int
}
data
StopSize
=
StopSize
{
unStopSize
::
Int
}
-- | TODO improve grouping functions of Authors, Sources, Institutes..
-- | TODO improve grouping functions of Authors, Sources, Institutes..
buildNgramsLists
::
Lang
->
Int
->
Int
->
StopSize
->
UserCorpusId
->
MasterCorpusId
buildNgramsLists
::
Lang
->
Int
->
Int
->
StopSize
->
UserCorpusId
->
MasterCorpusId
->
Cmd
err
(
Map
NgramsType
[
NgramsElement
])
->
Cmd
err
(
Map
NgramsType
[
NgramsElement
])
buildNgramsLists
l
n
m
s
uCid
mCid
=
do
buildNgramsLists
l
n
m
s
uCid
mCid
=
do
ngTerms
<-
buildNgramsTermsList
l
n
m
s
uCid
mCid
ngTerms
<-
buildNgramsTermsList
l
n
m
s
uCid
mCid
...
@@ -64,7 +69,9 @@ buildNgramsLists l n m s uCid mCid = do
...
@@ -64,7 +69,9 @@ buildNgramsLists l n m s uCid mCid = do
pure
$
Map
.
unions
$
othersTerms
<>
[
ngTerms
]
pure
$
Map
.
unions
$
othersTerms
<>
[
ngTerms
]
buildNgramsOthersList
::
UserCorpusId
->
(
Text
->
Text
)
->
NgramsType
buildNgramsOthersList
::
UserCorpusId
->
(
Text
->
Text
)
->
NgramsType
->
Cmd
err
(
Map
NgramsType
[
NgramsElement
])
->
Cmd
err
(
Map
NgramsType
[
NgramsElement
])
buildNgramsOthersList
uCid
groupIt
nt
=
do
buildNgramsOthersList
uCid
groupIt
nt
=
do
ngs
<-
groupNodesByNgramsWith
groupIt
<$>
getNodesByNgramsUser
uCid
nt
ngs
<-
groupNodesByNgramsWith
groupIt
<$>
getNodesByNgramsUser
uCid
nt
...
@@ -83,12 +90,14 @@ buildNgramsOthersList uCid groupIt nt = do
...
@@ -83,12 +90,14 @@ buildNgramsOthersList uCid groupIt nt = do
)
)
]
]
--
{-
{-
buildNgramsTermsList' :: UserCorpusId
buildNgramsTermsList' :: UserCorpusId
-> (Text -> Text)
-> (Text -> Text)
->
((
Text
,
(
Set
Text
,
Set
NodeId
))
->
Bool
)
->
Int
->
Int
-> ((Text, (Set Text, Set NodeId)) -> Bool)
-> Int
-> Int
-> Cmd err (Map NgramsType [NgramsElement])
-> Cmd err (Map NgramsType [NgramsElement])
--}
buildNgramsTermsList' uCid groupIt stop gls is = do
buildNgramsTermsList' uCid groupIt stop gls is = do
ngs <- groupNodesByNgramsWith groupIt <$> getNodesByNgramsUser uCid NgramsTerms
ngs <- groupNodesByNgramsWith groupIt <$> getNodesByNgramsUser uCid NgramsTerms
...
@@ -117,21 +126,31 @@ buildNgramsTermsList' uCid groupIt stop gls is = do
...
@@ -117,21 +126,31 @@ buildNgramsTermsList' uCid groupIt stop gls is = do
<> map (\t -> (GraphTerm, toList' t)) m
<> map (\t -> (GraphTerm, toList' t)) m
pure $ Map.fromList [(NgramsTerms, ngs')]
pure $ Map.fromList [(NgramsTerms, ngs')]
-}
buildNgramsTermsList
::
Lang
buildNgramsTermsList
::
Lang
->
Int
->
Int
->
StopSize
->
UserCorpusId
->
MasterCorpusId
->
Int
->
Int
->
StopSize
->
UserCorpusId
->
MasterCorpusId
->
Cmd
err
(
Map
NgramsType
[
NgramsElement
])
->
Cmd
err
(
Map
NgramsType
[
NgramsElement
])
buildNgramsTermsList
l
n
m
s
uCid
mCid
=
do
buildNgramsTermsList
l
n
m
s
uCid
mCid
=
do
candidates
<-
sortTficf
<$>
getTficf'
uCid
mCid
NgramsTerms
(
ngramsGroup
l
n
m
)
candidates
<-
sortTficf
<$>
getTficf
uCid
mCid
NgramsTerms
(
ngramsGroup
l
n
m
)
let
let
candidatesSize
=
2000
candidatesSize
=
2000
a
=
10
a
=
10
b
=
10
b
=
10
candidatesHead
=
List
.
take
candidatesSize
candidates
candidatesHead
=
List
.
take
candidatesSize
candidates
candidatesTail
=
List
.
drop
candidatesSize
candidates
candidatesTail
=
List
.
drop
candidatesSize
candidates
termList
=
(
toTermList
a
b
((
isStopTerm
s
)
.
fst
)
candidatesHead
)
termList
=
(
toTermList
a
b
((
isStopTerm
s
)
.
fst
)
candidatesHead
)
<>
(
map
(
toList
((
isStopTerm
s
)
.
fst
)
CandidateTerm
)
candidatesTail
)
<>
(
map
(
toList
((
isStopTerm
s
)
.
fst
)
CandidateTerm
)
candidatesTail
)
let
ngs
=
List
.
concat
$
map
toNgramsElement
termList
ngs
=
List
.
concat
$
map
toNgramsElement
termList
pure
$
Map
.
fromList
[(
NgramsTerms
,
ngs
)]
pure
$
Map
.
fromList
[(
NgramsTerms
,
ngs
)]
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment