Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
H
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Przemyslaw Kaminski
haskell-gargantext
Commits
7fd045e8
Commit
7fd045e8
authored
Sep 21, 2020
by
Alexandre Delanoë
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
[TextFlow] SpeGen scores (WIP)
parent
1e877937
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
38 additions
and
25 deletions
+38
-25
List.hs
src/Gargantext/Core/Text/List.hs
+37
-24
NgramsByNode.hs
src/Gargantext/Database/Action/Metrics/NgramsByNode.hs
+1
-1
No files found.
src/Gargantext/Core/Text/List.hs
View file @
7fd045e8
...
...
@@ -28,9 +28,10 @@ import Gargantext.API.Ngrams (NgramsElement, mkNgramsElement, NgramsTerm(..), Ro
-- import Gargantext.API.Ngrams.Tools (getCoocByNgrams', Diagonal(..))
import
Gargantext.Core
(
Lang
(
..
))
import
Gargantext.Core.Types
(
ListType
(
..
),
MasterCorpusId
,
UserCorpusId
,
Ordering
(
..
))
import
Gargantext.Database.Action.Metrics.NgramsByNode
(
ngramsGroup
,
getNodesByNgramsUser
,
groupNodesByNgramsWith
)
import
Gargantext.Database.Action.Metrics.NgramsByNode
(
ngramsGroup
,
getNodesByNgramsUser
,
groupNodesByNgramsWith
,
getNodesByNgramsOnlyUser
)
import
Gargantext.Database.Action.Metrics.TFICF
(
getTficf
)
import
Gargantext.Core.Text.Metrics.TFICF
(
sortTficf
)
import
Gargantext.Database.Query.Table.Node
(
defaultList
)
import
Gargantext.Database.Query.Table.Node.Error
(
HasNodeError
())
import
Gargantext.Database.Prelude
(
Cmd
)
import
Gargantext.Database.Schema.Ngrams
(
NgramsType
(
..
))
...
...
@@ -40,25 +41,26 @@ import Gargantext.Core.Text.List.Learn (Model(..))
-- import Gargantext.Core.Text.Metrics (takeScored)
data
NgramsListBuilder
=
BuilderStepO
{
stemSize
::
Int
,
stemX
::
Int
,
stopSize
::
Int
data
NgramsListBuilder
=
BuilderStepO
{
stemSize
::
!
Int
,
stemX
::
!
Int
,
stopSize
::
!
Int
}
|
BuilderStep1
{
withModel
::
Model
}
|
BuilderStepN
{
withModel
::
Model
}
|
Tficf
{
nlb_lang
::
Lang
,
nlb_group1
::
Int
,
nlb_group2
::
Int
,
nlb_stopSize
::
StopSize
,
nlb_userCorpusId
::
UserCorpusId
,
nlb_masterCorpusId
::
MasterCorpusId
|
BuilderStep1
{
withModel
::
!
Model
}
|
BuilderStepN
{
withModel
::
!
Model
}
|
Tficf
{
nlb_lang
::
!
Lang
,
nlb_group1
::
!
Int
,
nlb_group2
::
!
Int
,
nlb_stopSize
::
!
StopSize
,
nlb_userCorpusId
::
!
UserCorpusId
,
nlb_masterCorpusId
::
!
MasterCorpusId
}
data
StopSize
=
StopSize
{
unStopSize
::
Int
}
data
StopSize
=
StopSize
{
unStopSize
::
!
Int
}
-- | TODO improve grouping functions of Authors, Sources, Institutes..
buildNgramsLists
::
Lang
buildNgramsLists
::
HasNodeError
err
=>
Lang
->
Int
->
Int
->
StopSize
...
...
@@ -98,7 +100,9 @@ buildNgramsOthersList uCid groupIt nt = do
]
)]
buildNgramsTermsList
::
Lang
-- TODO use ListIds
buildNgramsTermsList
::
HasNodeError
err
=>
Lang
->
Int
->
Int
->
StopSize
...
...
@@ -108,8 +112,7 @@ buildNgramsTermsList :: Lang
buildNgramsTermsList
l
n
m
s
uCid
mCid
=
do
-- Computing global speGen score
-- TODO sort is not needed, just take the score
allTerms
<-
sortTficf
Up
<$>
getTficf
uCid
mCid
NgramsTerms
allTerms
<-
Map
.
toList
<$>
getTficf
uCid
mCid
NgramsTerms
-- printDebug "head candidates" (List.take 10 $ allTerms)
-- printDebug "tail candidates" (List.take 10 $ List.reverse $ allTerms)
...
...
@@ -147,8 +150,18 @@ buildNgramsTermsList l n m s uCid mCid = do
Set
.
empty
(
groupedMonoHead
<>
groupedMultHead
)
-- TO remove (and remove HasNodeError instance)
userListId
<-
defaultList
uCid
masterListId
<-
defaultList
mCid
mapTextDocIds
<-
getNodesByNgramsOnlyUser
uCid
[
userListId
,
masterListId
]
NgramsTerms
selectedTerms
-- groups Set NodeId
-- compute cooccurrences
-- compute scores
-- sort / filter
let
(
mono
,
multi
)
=
List
.
partition
(
\
t
->
(
size
.
fst
)
t
<
2
)
candidateTerms
(
monoHead
,
monoTail
)
=
List
.
splitAt
(
round
$
0.60
*
listSizeGlobal
)
mono
(
multiHead
,
multiTail
)
=
List
.
splitAt
(
round
$
0.40
*
listSizeGlobal
)
multi
...
...
@@ -177,12 +190,12 @@ type Group = Lang -> Int -> Int -> Text -> Text
type
Stem
=
Text
type
Label
=
Text
data
GroupedText
score
=
GroupedText
{
_gt_listType
::
Maybe
ListType
,
_gt_label
::
Label
,
_gt_score
::
score
,
_gt_group
::
Set
Text
,
_gt_size
::
Int
,
_gt_stem
::
Stem
GroupedText
{
_gt_listType
::
!
(
Maybe
ListType
)
,
_gt_label
::
!
Label
,
_gt_score
::
!
score
,
_gt_group
::
!
(
Set
Text
)
,
_gt_size
::
!
Int
,
_gt_stem
::
!
Stem
}
instance
(
Eq
a
)
=>
Eq
(
GroupedText
a
)
where
...
...
src/Gargantext/Database/Action/Metrics/NgramsByNode.hs
View file @
7fd045e8
...
...
@@ -330,7 +330,7 @@ queryNgramsOnlyByNodeUser' = [sql|
|]
getNgramsByDocOnlyUser
::
Node
Id
getNgramsByDocOnlyUser
::
Doc
Id
->
[
ListId
]
->
NgramsType
->
[
Text
]
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment