Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
199
Issues
199
List
Board
Labels
Milestones
Merge Requests
12
Merge Requests
12
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
gargantext
haskell-gargantext
Commits
eeeec827
Commit
eeeec827
authored
Sep 17, 2020
by
Alexandre Delanoë
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
[TextFlow] ngrams size (WIP)
parent
80c2bb9e
Pipeline
#1073
failed with stage
Changes
7
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
Showing
7 changed files
with
31 additions
and
18 deletions
+31
-18
Ngrams.hs
src/Gargantext/API/Ngrams.hs
+3
-5
Text.hs
src/Gargantext/Core/Text.hs
+6
-0
List.hs
src/Gargantext/Core/Text/List.hs
+13
-5
Hetero.purs
src/Gargantext/Core/Text/Metrics/Hetero.purs
+3
-3
Terms.hs
src/Gargantext/Core/Text/Terms.hs
+1
-0
Matrice.hs
src/Gargantext/Core/Viz/Graph/Distances/Matrice.hs
+4
-4
NgramsByNode.hs
src/Gargantext/Database/Action/Metrics/NgramsByNode.hs
+1
-1
No files found.
src/Gargantext/API/Ngrams.hs
View file @
eeeec827
...
...
@@ -115,7 +115,7 @@ import Data.Set (Set)
import
qualified
Data.Set
as
S
import
qualified
Data.Set
as
Set
import
Data.Swagger
hiding
(
version
,
patch
)
import
Data.Text
(
Text
,
count
,
isInfixOf
,
unpack
)
import
Data.Text
(
Text
,
isInfixOf
,
unpack
)
import
Data.Text.Lazy.IO
as
DTL
import
Data.Validity
import
Database.PostgreSQL.Simple.FromField
(
FromField
,
fromField
)
...
...
@@ -144,6 +144,7 @@ import Gargantext.Database.Query.Table.Node.Error (HasNodeError)
import
Gargantext.Database.Admin.Types.Node
(
NodeType
(
..
))
import
Gargantext.Database.Prelude
(
fromField'
,
HasConnectionPool
,
HasConfig
)
import
qualified
Gargantext.Database.Query.Table.Ngrams
as
TableNgrams
import
qualified
Gargantext.Core.Text
as
GCT
------------------------------------------------------------------------
--data FacetFormat = Table | Chart
...
...
@@ -262,10 +263,7 @@ mkNgramsElement :: NgramsTerm
->
MSet
NgramsTerm
->
NgramsElement
mkNgramsElement
ngrams
list
rp
children
=
NgramsElement
ngrams
size
list
1
(
_rp_root
<$>
rp
)
(
_rp_parent
<$>
rp
)
children
where
-- TODO review
size
=
1
+
count
" "
ngrams
NgramsElement
ngrams
(
GCT
.
size
ngrams
)
list
1
(
_rp_root
<$>
rp
)
(
_rp_parent
<$>
rp
)
children
newNgramsElement
::
Maybe
ListType
->
NgramsTerm
->
NgramsElement
newNgramsElement
mayList
ngrams
=
...
...
src/Gargantext/Core/Text.hs
View file @
eeeec827
...
...
@@ -127,3 +127,9 @@ termTests = "It is hard to detect important articles in a specific context. Info
-- group ngrams
--ocs = occ $ ws
-- | Ngrams size
size
::
Text
->
Int
size
t
=
1
+
DT
.
count
" "
t
src/Gargantext/Core/Text/List.hs
View file @
eeeec827
...
...
@@ -27,6 +27,7 @@ import Gargantext.Core.Text.Metrics.TFICF (sortTficf)
import
Gargantext.Database.Prelude
(
Cmd
)
import
Gargantext.Database.Schema.Ngrams
(
NgramsType
(
..
))
import
Gargantext.Prelude
import
Gargantext.Core.Text
(
size
)
import
Gargantext.Core.Text.List.Learn
(
Model
(
..
))
-- import Gargantext.Core.Text.Metrics (takeScored)
import
qualified
Data.Char
as
Char
...
...
@@ -63,7 +64,8 @@ buildNgramsLists :: Lang
->
Cmd
err
(
Map
NgramsType
[
NgramsElement
])
buildNgramsLists
l
n
m
s
uCid
mCid
=
do
ngTerms
<-
buildNgramsTermsList
l
n
m
s
uCid
mCid
othersTerms
<-
mapM
(
buildNgramsOthersList
uCid
identity
)
[
Authors
,
Sources
,
Institutes
]
othersTerms
<-
mapM
(
buildNgramsOthersList
uCid
identity
)
[
Authors
,
Sources
,
Institutes
]
pure
$
Map
.
unions
$
othersTerms
<>
[
ngTerms
]
...
...
@@ -76,7 +78,9 @@ buildNgramsOthersList uCid groupIt nt = do
let
listSize
=
9
all'
=
List
.
reverse
$
List
.
sortOn
(
Set
.
size
.
snd
.
snd
)
$
Map
.
toList
ngs
all'
=
List
.
reverse
$
List
.
sortOn
(
Set
.
size
.
snd
.
snd
)
$
Map
.
toList
ngs
graphTerms
=
List
.
take
listSize
all'
candiTerms
=
List
.
drop
listSize
all'
...
...
@@ -104,12 +108,16 @@ buildNgramsTermsList l n m s uCid mCid = do
-- printDebug "tail candidates" (List.take 10 $ List.reverse $ candidates)
let
listSize
=
400
::
Double
(
candidatesHead
,
candidatesTail0
)
=
List
.
splitAt
3
candidates
(
candidatesMap
,
candidatesTailFinal
)
=
List
.
splitAt
400
candidatesTail0
(
mono
,
multi
)
=
List
.
partition
(
\
t
->
(
size
.
fst
)
t
<
2
)
candidatesTail0
(
monoHead
,
monoTail
)
=
List
.
splitAt
(
round
$
0.60
*
listSize
)
mono
(
multiHead
,
multiTail
)
=
List
.
splitAt
(
round
$
0.40
*
listSize
)
multi
termList
=
(
map
(
toGargList
((
isStopTerm
s
)
.
fst
)
CandidateTerm
)
candidatesHead
)
<>
(
map
(
toGargList
((
isStopTerm
s
)
.
fst
)
MapTerm
)
candidatesMap
)
<>
(
map
(
toGargList
((
isStopTerm
s
)
.
fst
)
CandidateTerm
)
candidatesTailFinal
)
<>
(
map
(
toGargList
((
isStopTerm
s
)
.
fst
)
MapTerm
)
(
monoHead
<>
multiHead
)
)
<>
(
map
(
toGargList
((
isStopTerm
s
)
.
fst
)
CandidateTerm
)
(
monoTail
<>
multiTail
)
)
ngs
=
List
.
concat
$
map
toNgramsElement
...
...
src/Gargantext/Core/Text/Metrics/Hetero.purs
View file @
eeeec827
...
...
@@ -52,12 +52,12 @@ dicoStruct dict_occ = do
heterogeinity :: [Char] -> IO Integer
heterogeinity string = do
let dict_occ = occurrences $ cleanText string
let keys_size = toInteger $ length $ M.keys dict_occ
let total_occ = sum $ Prelude.map (\(x, y) -> y) $ M.toList dict_occ
return $ div total_occ (fromIntegral keys_size)
--computeHeterogeinity
-- :: Fractional t =>
...
...
src/Gargantext/Core/Text/Terms.hs
View file @
eeeec827
...
...
@@ -57,6 +57,7 @@ import qualified Data.List as List
import
qualified
Data.Set
as
Set
import
qualified
Data.Text
as
Text
data
TermType
lang
=
Mono
{
_tt_lang
::
!
lang
}
|
Multi
{
_tt_lang
::
!
lang
}
...
...
src/Gargantext/Core/Viz/Graph/Distances/Matrice.hs
View file @
eeeec827
...
...
@@ -466,19 +466,19 @@ incExcSpeGen m = (run' inclusionExclusion m, run' specificityGenericity m)
-- | Inclusion (i) = Gen(i)+Spec(i)
inclusionExclusion
::
Acc
(
Matrix
Double
)
->
Acc
(
Vector
Double
)
inclusionExclusion
mat
=
zipWith
(
+
)
(
pV
mat
)
(
pV
mat
)
-- | Genericity score = Gen(i)- Spec(i)
specificityGenericity
::
Acc
(
Matrix
Double
)
->
Acc
(
Vector
Double
)
specificityGenericity
mat
=
zipWith
(
+
)
(
pH
mat
)
(
pH
mat
)
-- | Gen(i) : 1/(N-1)*Sum(j!=i, P(i|j)) : Genericity of i
pV
::
Acc
(
Matrix
Double
)
->
Acc
(
Vector
Double
)
pV
mat
=
map
(
\
x
->
(
x
-
1
)
/
(
cardN
-
1
))
$
sum
$
p_ij
mat
-- | Spec(i) : 1/(N-1)*Sum(j!=i, P(j|i)) : Specificity of j
pH
::
Acc
(
Matrix
Double
)
->
Acc
(
Vector
Double
)
pH
mat
=
map
(
\
x
->
(
x
-
1
)
/
(
cardN
-
1
))
$
sum
$
p_ji
mat
cardN
::
Exp
Double
cardN
=
constant
(
P
.
fromIntegral
(
dim
m
)
::
Double
)
...
...
src/Gargantext/Database/Action/Metrics/NgramsByNode.hs
View file @
eeeec827
...
...
@@ -236,7 +236,7 @@ queryNgramsOccurrencesOnlyByNodeUser' = [sql|
|]
------------------------------------------------------------------------
getNodesByNgramsOnlyUser
::
Node
Id
getNodesByNgramsOnlyUser
::
Corpus
Id
->
[
ListId
]
->
NgramsType
->
[
Text
]
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment