Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
150
Issues
150
List
Board
Labels
Milestones
Merge Requests
5
Merge Requests
5
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
gargantext
haskell-gargantext
Commits
eeeec827
Commit
eeeec827
authored
Sep 17, 2020
by
Alexandre Delanoë
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
[TextFlow] ngrams size (WIP)
parent
80c2bb9e
Pipeline
#1073
failed with stage
Changes
7
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
Showing
7 changed files
with
31 additions
and
18 deletions
+31
-18
Ngrams.hs
src/Gargantext/API/Ngrams.hs
+3
-5
Text.hs
src/Gargantext/Core/Text.hs
+6
-0
List.hs
src/Gargantext/Core/Text/List.hs
+13
-5
Hetero.purs
src/Gargantext/Core/Text/Metrics/Hetero.purs
+3
-3
Terms.hs
src/Gargantext/Core/Text/Terms.hs
+1
-0
Matrice.hs
src/Gargantext/Core/Viz/Graph/Distances/Matrice.hs
+4
-4
NgramsByNode.hs
src/Gargantext/Database/Action/Metrics/NgramsByNode.hs
+1
-1
No files found.
src/Gargantext/API/Ngrams.hs
View file @
eeeec827
...
@@ -115,7 +115,7 @@ import Data.Set (Set)
...
@@ -115,7 +115,7 @@ import Data.Set (Set)
import
qualified
Data.Set
as
S
import
qualified
Data.Set
as
S
import
qualified
Data.Set
as
Set
import
qualified
Data.Set
as
Set
import
Data.Swagger
hiding
(
version
,
patch
)
import
Data.Swagger
hiding
(
version
,
patch
)
import
Data.Text
(
Text
,
count
,
isInfixOf
,
unpack
)
import
Data.Text
(
Text
,
isInfixOf
,
unpack
)
import
Data.Text.Lazy.IO
as
DTL
import
Data.Text.Lazy.IO
as
DTL
import
Data.Validity
import
Data.Validity
import
Database.PostgreSQL.Simple.FromField
(
FromField
,
fromField
)
import
Database.PostgreSQL.Simple.FromField
(
FromField
,
fromField
)
...
@@ -144,6 +144,7 @@ import Gargantext.Database.Query.Table.Node.Error (HasNodeError)
...
@@ -144,6 +144,7 @@ import Gargantext.Database.Query.Table.Node.Error (HasNodeError)
import
Gargantext.Database.Admin.Types.Node
(
NodeType
(
..
))
import
Gargantext.Database.Admin.Types.Node
(
NodeType
(
..
))
import
Gargantext.Database.Prelude
(
fromField'
,
HasConnectionPool
,
HasConfig
)
import
Gargantext.Database.Prelude
(
fromField'
,
HasConnectionPool
,
HasConfig
)
import
qualified
Gargantext.Database.Query.Table.Ngrams
as
TableNgrams
import
qualified
Gargantext.Database.Query.Table.Ngrams
as
TableNgrams
import
qualified
Gargantext.Core.Text
as
GCT
------------------------------------------------------------------------
------------------------------------------------------------------------
--data FacetFormat = Table | Chart
--data FacetFormat = Table | Chart
...
@@ -262,10 +263,7 @@ mkNgramsElement :: NgramsTerm
...
@@ -262,10 +263,7 @@ mkNgramsElement :: NgramsTerm
->
MSet
NgramsTerm
->
MSet
NgramsTerm
->
NgramsElement
->
NgramsElement
mkNgramsElement
ngrams
list
rp
children
=
mkNgramsElement
ngrams
list
rp
children
=
NgramsElement
ngrams
size
list
1
(
_rp_root
<$>
rp
)
(
_rp_parent
<$>
rp
)
children
NgramsElement
ngrams
(
GCT
.
size
ngrams
)
list
1
(
_rp_root
<$>
rp
)
(
_rp_parent
<$>
rp
)
children
where
-- TODO review
size
=
1
+
count
" "
ngrams
newNgramsElement
::
Maybe
ListType
->
NgramsTerm
->
NgramsElement
newNgramsElement
::
Maybe
ListType
->
NgramsTerm
->
NgramsElement
newNgramsElement
mayList
ngrams
=
newNgramsElement
mayList
ngrams
=
...
...
src/Gargantext/Core/Text.hs
View file @
eeeec827
...
@@ -127,3 +127,9 @@ termTests = "It is hard to detect important articles in a specific context. Info
...
@@ -127,3 +127,9 @@ termTests = "It is hard to detect important articles in a specific context. Info
-- group ngrams
-- group ngrams
--ocs = occ $ ws
--ocs = occ $ ws
-- | Ngrams size
size
::
Text
->
Int
size
t
=
1
+
DT
.
count
" "
t
src/Gargantext/Core/Text/List.hs
View file @
eeeec827
...
@@ -27,6 +27,7 @@ import Gargantext.Core.Text.Metrics.TFICF (sortTficf)
...
@@ -27,6 +27,7 @@ import Gargantext.Core.Text.Metrics.TFICF (sortTficf)
import
Gargantext.Database.Prelude
(
Cmd
)
import
Gargantext.Database.Prelude
(
Cmd
)
import
Gargantext.Database.Schema.Ngrams
(
NgramsType
(
..
))
import
Gargantext.Database.Schema.Ngrams
(
NgramsType
(
..
))
import
Gargantext.Prelude
import
Gargantext.Prelude
import
Gargantext.Core.Text
(
size
)
import
Gargantext.Core.Text.List.Learn
(
Model
(
..
))
import
Gargantext.Core.Text.List.Learn
(
Model
(
..
))
-- import Gargantext.Core.Text.Metrics (takeScored)
-- import Gargantext.Core.Text.Metrics (takeScored)
import
qualified
Data.Char
as
Char
import
qualified
Data.Char
as
Char
...
@@ -63,7 +64,8 @@ buildNgramsLists :: Lang
...
@@ -63,7 +64,8 @@ buildNgramsLists :: Lang
->
Cmd
err
(
Map
NgramsType
[
NgramsElement
])
->
Cmd
err
(
Map
NgramsType
[
NgramsElement
])
buildNgramsLists
l
n
m
s
uCid
mCid
=
do
buildNgramsLists
l
n
m
s
uCid
mCid
=
do
ngTerms
<-
buildNgramsTermsList
l
n
m
s
uCid
mCid
ngTerms
<-
buildNgramsTermsList
l
n
m
s
uCid
mCid
othersTerms
<-
mapM
(
buildNgramsOthersList
uCid
identity
)
[
Authors
,
Sources
,
Institutes
]
othersTerms
<-
mapM
(
buildNgramsOthersList
uCid
identity
)
[
Authors
,
Sources
,
Institutes
]
pure
$
Map
.
unions
$
othersTerms
<>
[
ngTerms
]
pure
$
Map
.
unions
$
othersTerms
<>
[
ngTerms
]
...
@@ -76,7 +78,9 @@ buildNgramsOthersList uCid groupIt nt = do
...
@@ -76,7 +78,9 @@ buildNgramsOthersList uCid groupIt nt = do
let
let
listSize
=
9
listSize
=
9
all'
=
List
.
reverse
$
List
.
sortOn
(
Set
.
size
.
snd
.
snd
)
$
Map
.
toList
ngs
all'
=
List
.
reverse
$
List
.
sortOn
(
Set
.
size
.
snd
.
snd
)
$
Map
.
toList
ngs
graphTerms
=
List
.
take
listSize
all'
graphTerms
=
List
.
take
listSize
all'
candiTerms
=
List
.
drop
listSize
all'
candiTerms
=
List
.
drop
listSize
all'
...
@@ -104,12 +108,16 @@ buildNgramsTermsList l n m s uCid mCid = do
...
@@ -104,12 +108,16 @@ buildNgramsTermsList l n m s uCid mCid = do
-- printDebug "tail candidates" (List.take 10 $ List.reverse $ candidates)
-- printDebug "tail candidates" (List.take 10 $ List.reverse $ candidates)
let
let
listSize
=
400
::
Double
(
candidatesHead
,
candidatesTail0
)
=
List
.
splitAt
3
candidates
(
candidatesHead
,
candidatesTail0
)
=
List
.
splitAt
3
candidates
(
candidatesMap
,
candidatesTailFinal
)
=
List
.
splitAt
400
candidatesTail0
(
mono
,
multi
)
=
List
.
partition
(
\
t
->
(
size
.
fst
)
t
<
2
)
candidatesTail0
(
monoHead
,
monoTail
)
=
List
.
splitAt
(
round
$
0.60
*
listSize
)
mono
(
multiHead
,
multiTail
)
=
List
.
splitAt
(
round
$
0.40
*
listSize
)
multi
termList
=
(
map
(
toGargList
((
isStopTerm
s
)
.
fst
)
CandidateTerm
)
candidatesHead
)
termList
=
(
map
(
toGargList
((
isStopTerm
s
)
.
fst
)
CandidateTerm
)
candidatesHead
)
<>
(
map
(
toGargList
((
isStopTerm
s
)
.
fst
)
MapTerm
)
candidatesMap
)
<>
(
map
(
toGargList
((
isStopTerm
s
)
.
fst
)
MapTerm
)
(
monoHead
<>
multiHead
)
)
<>
(
map
(
toGargList
((
isStopTerm
s
)
.
fst
)
CandidateTerm
)
candidatesTailFinal
)
<>
(
map
(
toGargList
((
isStopTerm
s
)
.
fst
)
CandidateTerm
)
(
monoTail
<>
multiTail
)
)
ngs
=
List
.
concat
ngs
=
List
.
concat
$
map
toNgramsElement
$
map
toNgramsElement
...
...
src/Gargantext/Core/Text/Metrics/Hetero.purs
View file @
eeeec827
...
@@ -52,12 +52,12 @@ dicoStruct dict_occ = do
...
@@ -52,12 +52,12 @@ dicoStruct dict_occ = do
heterogeinity :: [Char] -> IO Integer
heterogeinity :: [Char] -> IO Integer
heterogeinity string = do
heterogeinity string = do
let dict_occ = occurrences $ cleanText string
let dict_occ = occurrences $ cleanText string
let keys_size = toInteger $ length $ M.keys dict_occ
let keys_size = toInteger $ length $ M.keys dict_occ
let total_occ = sum $ Prelude.map (\(x, y) -> y) $ M.toList dict_occ
let total_occ = sum $ Prelude.map (\(x, y) -> y) $ M.toList dict_occ
return $ div total_occ (fromIntegral keys_size)
return $ div total_occ (fromIntegral keys_size)
--computeHeterogeinity
--computeHeterogeinity
-- :: Fractional t =>
-- :: Fractional t =>
...
...
src/Gargantext/Core/Text/Terms.hs
View file @
eeeec827
...
@@ -57,6 +57,7 @@ import qualified Data.List as List
...
@@ -57,6 +57,7 @@ import qualified Data.List as List
import
qualified
Data.Set
as
Set
import
qualified
Data.Set
as
Set
import
qualified
Data.Text
as
Text
import
qualified
Data.Text
as
Text
data
TermType
lang
data
TermType
lang
=
Mono
{
_tt_lang
::
!
lang
}
=
Mono
{
_tt_lang
::
!
lang
}
|
Multi
{
_tt_lang
::
!
lang
}
|
Multi
{
_tt_lang
::
!
lang
}
...
...
src/Gargantext/Core/Viz/Graph/Distances/Matrice.hs
View file @
eeeec827
...
@@ -466,19 +466,19 @@ incExcSpeGen m = (run' inclusionExclusion m, run' specificityGenericity m)
...
@@ -466,19 +466,19 @@ incExcSpeGen m = (run' inclusionExclusion m, run' specificityGenericity m)
-- | Inclusion (i) = Gen(i)+Spec(i)
-- | Inclusion (i) = Gen(i)+Spec(i)
inclusionExclusion
::
Acc
(
Matrix
Double
)
->
Acc
(
Vector
Double
)
inclusionExclusion
::
Acc
(
Matrix
Double
)
->
Acc
(
Vector
Double
)
inclusionExclusion
mat
=
zipWith
(
+
)
(
pV
mat
)
(
pV
mat
)
inclusionExclusion
mat
=
zipWith
(
+
)
(
pV
mat
)
(
pV
mat
)
-- | Genericity score = Gen(i)- Spec(i)
-- | Genericity score = Gen(i)- Spec(i)
specificityGenericity
::
Acc
(
Matrix
Double
)
->
Acc
(
Vector
Double
)
specificityGenericity
::
Acc
(
Matrix
Double
)
->
Acc
(
Vector
Double
)
specificityGenericity
mat
=
zipWith
(
+
)
(
pH
mat
)
(
pH
mat
)
specificityGenericity
mat
=
zipWith
(
+
)
(
pH
mat
)
(
pH
mat
)
-- | Gen(i) : 1/(N-1)*Sum(j!=i, P(i|j)) : Genericity of i
-- | Gen(i) : 1/(N-1)*Sum(j!=i, P(i|j)) : Genericity of i
pV
::
Acc
(
Matrix
Double
)
->
Acc
(
Vector
Double
)
pV
::
Acc
(
Matrix
Double
)
->
Acc
(
Vector
Double
)
pV
mat
=
map
(
\
x
->
(
x
-
1
)
/
(
cardN
-
1
))
$
sum
$
p_ij
mat
pV
mat
=
map
(
\
x
->
(
x
-
1
)
/
(
cardN
-
1
))
$
sum
$
p_ij
mat
-- | Spec(i) : 1/(N-1)*Sum(j!=i, P(j|i)) : Specificity of j
-- | Spec(i) : 1/(N-1)*Sum(j!=i, P(j|i)) : Specificity of j
pH
::
Acc
(
Matrix
Double
)
->
Acc
(
Vector
Double
)
pH
::
Acc
(
Matrix
Double
)
->
Acc
(
Vector
Double
)
pH
mat
=
map
(
\
x
->
(
x
-
1
)
/
(
cardN
-
1
))
$
sum
$
p_ji
mat
pH
mat
=
map
(
\
x
->
(
x
-
1
)
/
(
cardN
-
1
))
$
sum
$
p_ji
mat
cardN
::
Exp
Double
cardN
::
Exp
Double
cardN
=
constant
(
P
.
fromIntegral
(
dim
m
)
::
Double
)
cardN
=
constant
(
P
.
fromIntegral
(
dim
m
)
::
Double
)
...
...
src/Gargantext/Database/Action/Metrics/NgramsByNode.hs
View file @
eeeec827
...
@@ -236,7 +236,7 @@ queryNgramsOccurrencesOnlyByNodeUser' = [sql|
...
@@ -236,7 +236,7 @@ queryNgramsOccurrencesOnlyByNodeUser' = [sql|
|]
|]
------------------------------------------------------------------------
------------------------------------------------------------------------
getNodesByNgramsOnlyUser
::
Node
Id
getNodesByNgramsOnlyUser
::
Corpus
Id
->
[
ListId
]
->
[
ListId
]
->
NgramsType
->
NgramsType
->
[
Text
]
->
[
Text
]
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment