Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
153
Issues
153
List
Board
Labels
Milestones
Merge Requests
9
Merge Requests
9
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
gargantext
haskell-gargantext
Commits
eeeec827
Commit
eeeec827
authored
Sep 17, 2020
by
Alexandre Delanoë
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
[TextFlow] ngrams size (WIP)
parent
80c2bb9e
Pipeline
#1073
failed with stage
Changes
7
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
Showing
7 changed files
with
31 additions
and
18 deletions
+31
-18
Ngrams.hs
src/Gargantext/API/Ngrams.hs
+3
-5
Text.hs
src/Gargantext/Core/Text.hs
+6
-0
List.hs
src/Gargantext/Core/Text/List.hs
+13
-5
Hetero.purs
src/Gargantext/Core/Text/Metrics/Hetero.purs
+3
-3
Terms.hs
src/Gargantext/Core/Text/Terms.hs
+1
-0
Matrice.hs
src/Gargantext/Core/Viz/Graph/Distances/Matrice.hs
+4
-4
NgramsByNode.hs
src/Gargantext/Database/Action/Metrics/NgramsByNode.hs
+1
-1
No files found.
src/Gargantext/API/Ngrams.hs
View file @
eeeec827
...
...
@@ -115,7 +115,7 @@ import Data.Set (Set)
import
qualified
Data.Set
as
S
import
qualified
Data.Set
as
Set
import
Data.Swagger
hiding
(
version
,
patch
)
import
Data.Text
(
Text
,
count
,
isInfixOf
,
unpack
)
import
Data.Text
(
Text
,
isInfixOf
,
unpack
)
import
Data.Text.Lazy.IO
as
DTL
import
Data.Validity
import
Database.PostgreSQL.Simple.FromField
(
FromField
,
fromField
)
...
...
@@ -144,6 +144,7 @@ import Gargantext.Database.Query.Table.Node.Error (HasNodeError)
import
Gargantext.Database.Admin.Types.Node
(
NodeType
(
..
))
import
Gargantext.Database.Prelude
(
fromField'
,
HasConnectionPool
,
HasConfig
)
import
qualified
Gargantext.Database.Query.Table.Ngrams
as
TableNgrams
import
qualified
Gargantext.Core.Text
as
GCT
------------------------------------------------------------------------
--data FacetFormat = Table | Chart
...
...
@@ -262,10 +263,7 @@ mkNgramsElement :: NgramsTerm
->
MSet
NgramsTerm
->
NgramsElement
mkNgramsElement
ngrams
list
rp
children
=
NgramsElement
ngrams
size
list
1
(
_rp_root
<$>
rp
)
(
_rp_parent
<$>
rp
)
children
where
-- TODO review
size
=
1
+
count
" "
ngrams
NgramsElement
ngrams
(
GCT
.
size
ngrams
)
list
1
(
_rp_root
<$>
rp
)
(
_rp_parent
<$>
rp
)
children
newNgramsElement
::
Maybe
ListType
->
NgramsTerm
->
NgramsElement
newNgramsElement
mayList
ngrams
=
...
...
src/Gargantext/Core/Text.hs
View file @
eeeec827
...
...
@@ -127,3 +127,9 @@ termTests = "It is hard to detect important articles in a specific context. Info
-- group ngrams
--ocs = occ $ ws
-- | Ngrams size
size
::
Text
->
Int
size
t
=
1
+
DT
.
count
" "
t
src/Gargantext/Core/Text/List.hs
View file @
eeeec827
...
...
@@ -27,6 +27,7 @@ import Gargantext.Core.Text.Metrics.TFICF (sortTficf)
import
Gargantext.Database.Prelude
(
Cmd
)
import
Gargantext.Database.Schema.Ngrams
(
NgramsType
(
..
))
import
Gargantext.Prelude
import
Gargantext.Core.Text
(
size
)
import
Gargantext.Core.Text.List.Learn
(
Model
(
..
))
-- import Gargantext.Core.Text.Metrics (takeScored)
import
qualified
Data.Char
as
Char
...
...
@@ -63,7 +64,8 @@ buildNgramsLists :: Lang
->
Cmd
err
(
Map
NgramsType
[
NgramsElement
])
buildNgramsLists
l
n
m
s
uCid
mCid
=
do
ngTerms
<-
buildNgramsTermsList
l
n
m
s
uCid
mCid
othersTerms
<-
mapM
(
buildNgramsOthersList
uCid
identity
)
[
Authors
,
Sources
,
Institutes
]
othersTerms
<-
mapM
(
buildNgramsOthersList
uCid
identity
)
[
Authors
,
Sources
,
Institutes
]
pure
$
Map
.
unions
$
othersTerms
<>
[
ngTerms
]
...
...
@@ -76,7 +78,9 @@ buildNgramsOthersList uCid groupIt nt = do
let
listSize
=
9
all'
=
List
.
reverse
$
List
.
sortOn
(
Set
.
size
.
snd
.
snd
)
$
Map
.
toList
ngs
all'
=
List
.
reverse
$
List
.
sortOn
(
Set
.
size
.
snd
.
snd
)
$
Map
.
toList
ngs
graphTerms
=
List
.
take
listSize
all'
candiTerms
=
List
.
drop
listSize
all'
...
...
@@ -104,12 +108,16 @@ buildNgramsTermsList l n m s uCid mCid = do
-- printDebug "tail candidates" (List.take 10 $ List.reverse $ candidates)
let
listSize
=
400
::
Double
(
candidatesHead
,
candidatesTail0
)
=
List
.
splitAt
3
candidates
(
candidatesMap
,
candidatesTailFinal
)
=
List
.
splitAt
400
candidatesTail0
(
mono
,
multi
)
=
List
.
partition
(
\
t
->
(
size
.
fst
)
t
<
2
)
candidatesTail0
(
monoHead
,
monoTail
)
=
List
.
splitAt
(
round
$
0.60
*
listSize
)
mono
(
multiHead
,
multiTail
)
=
List
.
splitAt
(
round
$
0.40
*
listSize
)
multi
termList
=
(
map
(
toGargList
((
isStopTerm
s
)
.
fst
)
CandidateTerm
)
candidatesHead
)
<>
(
map
(
toGargList
((
isStopTerm
s
)
.
fst
)
MapTerm
)
candidatesMap
)
<>
(
map
(
toGargList
((
isStopTerm
s
)
.
fst
)
CandidateTerm
)
candidatesTailFinal
)
<>
(
map
(
toGargList
((
isStopTerm
s
)
.
fst
)
MapTerm
)
(
monoHead
<>
multiHead
)
)
<>
(
map
(
toGargList
((
isStopTerm
s
)
.
fst
)
CandidateTerm
)
(
monoTail
<>
multiTail
)
)
ngs
=
List
.
concat
$
map
toNgramsElement
...
...
src/Gargantext/Core/Text/Metrics/Hetero.purs
View file @
eeeec827
...
...
@@ -52,12 +52,12 @@ dicoStruct dict_occ = do
heterogeinity :: [Char] -> IO Integer
heterogeinity string = do
let dict_occ = occurrences $ cleanText string
let keys_size = toInteger $ length $ M.keys dict_occ
let total_occ = sum $ Prelude.map (\(x, y) -> y) $ M.toList dict_occ
return $ div total_occ (fromIntegral keys_size)
--computeHeterogeinity
-- :: Fractional t =>
...
...
src/Gargantext/Core/Text/Terms.hs
View file @
eeeec827
...
...
@@ -57,6 +57,7 @@ import qualified Data.List as List
import
qualified
Data.Set
as
Set
import
qualified
Data.Text
as
Text
data
TermType
lang
=
Mono
{
_tt_lang
::
!
lang
}
|
Multi
{
_tt_lang
::
!
lang
}
...
...
src/Gargantext/Core/Viz/Graph/Distances/Matrice.hs
View file @
eeeec827
...
...
@@ -466,19 +466,19 @@ incExcSpeGen m = (run' inclusionExclusion m, run' specificityGenericity m)
-- | Inclusion (i) = Gen(i)+Spec(i)
inclusionExclusion
::
Acc
(
Matrix
Double
)
->
Acc
(
Vector
Double
)
inclusionExclusion
mat
=
zipWith
(
+
)
(
pV
mat
)
(
pV
mat
)
-- | Genericity score = Gen(i)- Spec(i)
specificityGenericity
::
Acc
(
Matrix
Double
)
->
Acc
(
Vector
Double
)
specificityGenericity
mat
=
zipWith
(
+
)
(
pH
mat
)
(
pH
mat
)
-- | Gen(i) : 1/(N-1)*Sum(j!=i, P(i|j)) : Genericity of i
pV
::
Acc
(
Matrix
Double
)
->
Acc
(
Vector
Double
)
pV
mat
=
map
(
\
x
->
(
x
-
1
)
/
(
cardN
-
1
))
$
sum
$
p_ij
mat
-- | Spec(i) : 1/(N-1)*Sum(j!=i, P(j|i)) : Specificity of j
pH
::
Acc
(
Matrix
Double
)
->
Acc
(
Vector
Double
)
pH
mat
=
map
(
\
x
->
(
x
-
1
)
/
(
cardN
-
1
))
$
sum
$
p_ji
mat
cardN
::
Exp
Double
cardN
=
constant
(
P
.
fromIntegral
(
dim
m
)
::
Double
)
...
...
src/Gargantext/Database/Action/Metrics/NgramsByNode.hs
View file @
eeeec827
...
...
@@ -236,7 +236,7 @@ queryNgramsOccurrencesOnlyByNodeUser' = [sql|
|]
------------------------------------------------------------------------
getNodesByNgramsOnlyUser
::
Node
Id
getNodesByNgramsOnlyUser
::
Corpus
Id
->
[
ListId
]
->
NgramsType
->
[
Text
]
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment