Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
159
Issues
159
List
Board
Labels
Milestones
Merge Requests
9
Merge Requests
9
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
gargantext
haskell-gargantext
Commits
e69e0599
Commit
e69e0599
authored
Dec 16, 2024
by
Grégoire Locqueville
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Removed some more dead code and modules
parent
52110ac8
Pipeline
#7143
canceled with stages
Changes
14
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
Showing
14 changed files
with
5 additions
and
517 deletions
+5
-517
gargantext.cabal
gargantext.cabal
+0
-3
Metrics.hs
src/Gargantext/API/Metrics.hs
+1
-1
Metrics.hs
src/Gargantext/Core/Text/Metrics.hs
+0
-11
CharByChar.hs
src/Gargantext/Core/Text/Metrics/CharByChar.hs
+0
-96
Count.hs
src/Gargantext/Core/Text/Metrics/Count.hs
+2
-81
FrequentItemSet.hs
src/Gargantext/Core/Text/Metrics/FrequentItemSet.hs
+0
-13
TFICF.hs
src/Gargantext/Core/Text/Metrics/TFICF.hs
+0
-13
Utils.hs
src/Gargantext/Core/Text/Metrics/Utils.hs
+0
-41
Eleve.hs
src/Gargantext/Core/Text/Terms/Eleve.hs
+0
-72
En.hs
src/Gargantext/Core/Text/Terms/Mono/Token/En.hs
+0
-9
PosTagging.hs
src/Gargantext/Core/Text/Terms/Multi/PosTagging.hs
+0
-25
RAKE.hs
src/Gargantext/Core/Text/Terms/Multi/RAKE.hs
+1
-6
Pairing.hs
src/Gargantext/Database/Action/Flow/Pairing.hs
+1
-1
Metrics.hs
test/Test/Ngrams/Metrics.hs
+0
-145
No files found.
gargantext.cabal
View file @
e69e0599
...
@@ -217,7 +217,6 @@ library
...
@@ -217,7 +217,6 @@ library
Gargantext.Core.Text.List.Group.WithStem
Gargantext.Core.Text.List.Group.WithStem
Gargantext.Core.Text.List.Social
Gargantext.Core.Text.List.Social
Gargantext.Core.Text.Metrics
Gargantext.Core.Text.Metrics
Gargantext.Core.Text.Metrics.CharByChar
Gargantext.Core.Text.Metrics.Count
Gargantext.Core.Text.Metrics.Count
Gargantext.Core.Text.Metrics.TFICF
Gargantext.Core.Text.Metrics.TFICF
Gargantext.Core.Text.Ngrams
Gargantext.Core.Text.Ngrams
...
@@ -376,7 +375,6 @@ library
...
@@ -376,7 +375,6 @@ library
Gargantext.Core.Text.List.Social.Prelude
Gargantext.Core.Text.List.Social.Prelude
Gargantext.Core.Text.Metrics.FrequentItemSet
Gargantext.Core.Text.Metrics.FrequentItemSet
Gargantext.Core.Text.Metrics.SpeGen.IncExc
Gargantext.Core.Text.Metrics.SpeGen.IncExc
Gargantext.Core.Text.Metrics.Utils
Gargantext.Core.Text.Samples.EN
Gargantext.Core.Text.Samples.EN
Gargantext.Core.Text.Terms.Mono.Token.En
Gargantext.Core.Text.Terms.Mono.Token.En
Gargantext.Core.Text.Terms.Multi.Group
Gargantext.Core.Text.Terms.Multi.Group
...
@@ -815,7 +813,6 @@ test-suite garg-test-tasty
...
@@ -815,7 +813,6 @@ test-suite garg-test-tasty
Test.Ngrams.Lang.En
Test.Ngrams.Lang.En
Test.Ngrams.Lang.Fr
Test.Ngrams.Lang.Fr
Test.Ngrams.Lang.Occurrences
Test.Ngrams.Lang.Occurrences
Test.Ngrams.Metrics
Test.Ngrams.NLP
Test.Ngrams.NLP
Test.Ngrams.Query
Test.Ngrams.Query
Test.Ngrams.Query.PaginationCorpus
Test.Ngrams.Query.PaginationCorpus
...
...
src/Gargantext/API/Metrics.hs
View file @
e69e0599
...
@@ -25,7 +25,7 @@ import Gargantext.API.Ngrams.Types (QueryParamR, TabType, ngramsTypeFromTabType,
...
@@ -25,7 +25,7 @@ import Gargantext.API.Ngrams.Types (QueryParamR, TabType, ngramsTypeFromTabType,
import
Gargantext.API.Prelude
(
IsGargServer
)
import
Gargantext.API.Prelude
(
IsGargServer
)
import
Gargantext.API.Routes.Named.Metrics
qualified
as
Named
import
Gargantext.API.Routes.Named.Metrics
qualified
as
Named
import
Gargantext.Core.NodeStory
(
HasNodeStory
)
import
Gargantext.Core.NodeStory
(
HasNodeStory
)
import
Gargantext.Core.Text.Metrics
(
Scored
(
..
),
{-normalizeGlobal,-}
normalizeLocal
)
import
Gargantext.Core.Text.Metrics
(
Scored
(
..
),
normalizeLocal
)
import
Gargantext.Core.Types
(
CorpusId
,
ListId
,
ListType
(
..
))
import
Gargantext.Core.Types
(
CorpusId
,
ListId
,
ListType
(
..
))
import
Gargantext.Core.Types.Query
(
Limit
)
import
Gargantext.Core.Types.Query
(
Limit
)
import
Gargantext.Core.Viz.Chart
(
chartData
,
histoData
,
treeData
)
import
Gargantext.Core.Viz.Chart
(
chartData
,
histoData
,
treeData
)
...
...
src/Gargantext/Core/Text/Metrics.hs
View file @
e69e0599
...
@@ -58,17 +58,6 @@ instance Semigroup a => Semigroup (Scored a) where
...
@@ -58,17 +58,6 @@ instance Semigroup a => Semigroup (Scored a) where
(
b
<>
b'
)
(
b
<>
b'
)
(
c
<>
c'
)
(
c
<>
c'
)
localMetrics'
::
Ord
t
=>
Map
(
t
,
t
)
Int
->
Map
t
(
Vec
.
Vector
Double
)
localMetrics'
m
=
Map
.
fromList
$
zipWith
(
\
(
_
,
t
)
(
inc
,
spe
)
->
(
t
,
Vec
.
fromList
[
inc
,
spe
]))
(
Map
.
toList
fi
)
scores
where
(
ti
,
fi
)
=
createIndices
m
(
is
,
ss
)
=
incExcSpeGen
$
cooc2mat
Triangle
ti
m
scores
=
DAA
.
toList
$
DAA
.
run
$
DAA
.
zip
(
DAA
.
use
is
)
(
DAA
.
use
ss
)
-- TODO Code to be removed below
-- TODO Code to be removed below
-- TODO in the textflow we end up needing these indices , it might be
-- TODO in the textflow we end up needing these indices , it might be
-- better to compute them earlier and pass them around.
-- better to compute them earlier and pass them around.
...
...
src/Gargantext/Core/Text/Metrics/CharByChar.hs
deleted
100644 → 0
View file @
52110ac8
{-|
Module : Gargantext.Core.Text.Metrics.CharByChar
Description : All parsers of Gargantext in one file.
Copyright : (c) CNRS, 2017 - present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
Mainly reexport functions in @Data.Text.Metrics@
-}
module
Gargantext.Core.Text.Metrics.CharByChar
(
levenshtein
,
levenshteinNorm
,
damerauLevenshtein
,
damerauLevenshteinNorm
,
overlap
,
jaccard
,
hamming
)
where
import
Data.Text.Metrics
qualified
as
DTM
import
Gargantext.Prelude
--noApax :: Ord a => Map a Occ -> Map a Occ
--noApax m = M.filter (>1) m
{- * Example de titre
-}
-- | This module provide metrics to compare Text
-- starting as an API rexporting main functions of the great lib
-- text-metrics of Mark Karpov
-- | Levenshtein Similarity
-- In information theory, Linguistics and computer science,
-- the Levenshtein distance is a string metric for measuring
-- the difference between two sequences.
-- See: https://en.wikipedia.org/wiki/Levenshtein_distance
--
levenshtein
::
Text
->
Text
->
Int
levenshtein
=
DTM
.
levenshtein
-- | Return normalized Levenshtein distance between two 'Text' values.
-- Result is a non-negative rational number (represented as @'Ratio'
-- 'Data.Numeric.Natural'@), where 0 signifies no similarity between the
-- strings, while 1 means exact match.
--
levenshteinNorm
::
Text
->
Text
->
Ratio
Int
levenshteinNorm
=
DTM
.
levenshteinNorm
-- | Return Damerau-Levenshtein distance between two 'Text' values. The
-- function works like 'levenshtein', but the collection of allowed
-- operations also includes transposition of two /adjacent/ characters.
-- See also:
-- <https://en.wikipedia.org/wiki/Damerau%E2%80%93Levenshtein_distance>
--
damerauLevenshtein
::
Text
->
Text
->
Int
damerauLevenshtein
=
DTM
.
damerauLevenshtein
-- damerau-Levenshtein distance normalized
--
damerauLevenshteinNorm
::
Text
->
Text
->
Ratio
Int
damerauLevenshteinNorm
=
DTM
.
damerauLevenshteinNorm
-- Treating inputs like sets
-- | Return overlap coefficient for two 'Text' values. Returned value
-- is in the range from 0 (no similarity) to 1 (exact match). Return 1
-- if both 'Text' values are empty.
--
-- See also: <https://en.wikipedia.org/wiki/Overlap_coefficient>.
overlap
::
Text
->
Text
->
Ratio
Int
overlap
=
DTM
.
overlap
-- | Jaccard distance
-- measures dissimilarity between sample sets
jaccard
::
Text
->
Text
->
Ratio
Int
jaccard
=
DTM
.
jaccard
-- | Hamming Similarity
-- In information theory, the Hamming distance between two strings of
-- equal length is the number of positions at which the corresponding
-- symbols are different. In other words, it measures the minimum number of
-- substitutions required to change one string into the other
-- See: https://en.wikipedia.org/wiki/Hamming_distance
hamming
::
Text
->
Text
->
Maybe
Int
hamming
=
DTM
.
hamming
src/Gargantext/Core/Text/Metrics/Count.hs
View file @
e69e0599
...
@@ -27,12 +27,10 @@ Source : https://en.wikipedia.org/wiki/Type%E2%80%93token_distinction#Occurrence
...
@@ -27,12 +27,10 @@ Source : https://en.wikipedia.org/wiki/Type%E2%80%93token_distinction#Occurrence
module
Gargantext.Core.Text.Metrics.Count
module
Gargantext.Core.Text.Metrics.Count
where
where
import
Control.Arrow
(
Arrow
(
..
),
(
***
))
import
Data.List
qualified
as
List
import
Data.List
qualified
as
List
import
Data.Map.Strict
(
empty
,
singleton
,
insertWith
,
unionWith
,
unionsWith
,
mapKeys
)
import
Data.Map.Strict
(
empty
,
insertWith
)
import
Data.Map.Strict
qualified
as
DMS
import
Data.Map.Strict
qualified
as
DMS
import
Data.Text
(
pack
)
import
Gargantext.Core.Types
(
Stems
)
import
Gargantext.Core.Types
(
Terms
(
..
),
Stems
)
import
Gargantext.Prelude
hiding
(
empty
)
import
Gargantext.Prelude
hiding
(
empty
)
------------------------------------------------------------------------
------------------------------------------------------------------------
...
@@ -45,73 +43,10 @@ data Group = ByStem | ByOntology
...
@@ -45,73 +43,10 @@ data Group = ByStem | ByOntology
type
Grouped
=
Stems
type
Grouped
=
Stems
{-
-- >> let testData = ["blue lagoon", "blues lagoon", "red lagoon"]
-- >> map occurrences <$> Prelude.mapM (terms Mono EN)
-- [fromList [(fromList ["blue"],1),(fromList ["lagoon"],1)],fromList [(fromList ["blue"],1),(fromList ["lagoon"],1)],fromList [(fromList ["lagoon"],1),(fromList ["red"],1)]]
--λ: cooc <$> Prelude.map occurrences <$> Prelude.mapM (terms Mono EN) ["blue lagoon", "blues lagoon", "red lagoon"]
--fromList [((fromList ["blue"],fromList ["lagoon"]),2),((fromList ["lagoon"],fromList ["red"]),1)]
--λ: cooc <$> Prelude.map occurrences <$> Prelude.mapM (terms Mono EN) ["blue lagoon", "blues lagoon", "red lagoon", "red lagoon"]
--fromList [((fromList ["blue"],fromList ["lagoon"]),2),((fromList ["lagoon"],fromList ["red"]),2)]
--λ: cooc <$> Prelude.map occurrences <$> Prelude.mapM (terms Mono EN) ["blue lagoon", "blues lagoon", "red lagoon red lagoon", "red lagoon"]
--fromList [((fromList ["blue"],fromList ["lagoon"]),2),((fromList ["lagoon"],fromList ["red"]),2)]
--λ: cooc <$> Prelude.map occurrences <$> Prelude.mapM (terms Mono EN) ["blue lagoon", "blues lagoon blues lagoon", "red lagoon red lagoon", "red lagoon"]
--fromList [((fromList ["blue"],fromList ["lagoon"]),2),((fromList ["lagoon"],fromList ["red"]),2)]
----
-}
type
Occs
=
Int
type
Occs
=
Int
type
Coocs
=
Int
type
Coocs
=
Int
type
Threshold
=
Int
type
Threshold
=
Int
removeApax
::
Threshold
->
Map
([
Text
],
[
Text
])
Int
->
Map
([
Text
],
[
Text
])
Int
removeApax
t
=
DMS
.
filter
(
>
t
)
cooc
::
[[
Terms
]]
->
Map
([
Text
],
[
Text
])
Int
cooc
tss
=
coocOnWithLabel
_terms_stem
(
useLabelPolicy
label_policy
)
tss
where
terms_occs
=
occurrencesOn
_terms_stem
(
List
.
concat
tss
)
label_policy
=
mkLabelPolicy
terms_occs
coocOnWithLabel
::
(
Ord
label
,
Ord
b
)
=>
(
a
->
b
)
->
(
b
->
label
)
->
[[
a
]]
->
Map
(
label
,
label
)
Coocs
coocOnWithLabel
on'
policy
tss
=
mapKeys
(
delta
policy
)
$
coocOn
on'
tss
where
delta
::
Arrow
a
=>
a
b'
c'
->
a
(
b'
,
b'
)
(
c'
,
c'
)
delta
f
=
f
***
f
mkLabelPolicy
::
Map
Grouped
(
Map
Terms
Occs
)
->
Map
Grouped
[
Text
]
mkLabelPolicy
=
DMS
.
map
f
where
f
=
_terms_label
.
fst
.
maximumWith
snd
.
DMS
.
toList
-- TODO use the Foldable instance of Map instead of building a list
useLabelPolicy
::
Map
Grouped
[
Text
]
->
Grouped
->
[
Text
]
useLabelPolicy
m
g
=
case
DMS
.
lookup
g
m
of
Just
label
->
label
Nothing
->
panic
$
"Label of Grouped not found: "
<>
(
pack
$
show
g
)
-- TODO: use a non-fatal error if this can happen in practice
{-
labelPolicy :: Map Grouped (Map Terms Occs) -> Grouped -> Label
labelPolicy m g = case _terms_label <$> fst <$> maximumWith snd <$> DMS.toList <$> lookup g m of
Just label -> label
Nothing -> panic $ "Label of Grouped not found: " <> (pack $ show g)
-}
coocOn
::
Ord
b
=>
(
a
->
b
)
->
[[
a
]]
->
Map
(
b
,
b
)
Int
coocOn
f
as
=
DMS
.
unionsWith
(
+
)
$
map
(
coocOn'
f
)
as
coocOn'
::
Ord
b
=>
(
a
->
b
)
->
[
a
]
->
Map
(
b
,
b
)
Int
coocOn'
fun
ts
=
DMS
.
fromListWith
(
+
)
xs
where
ts'
=
List
.
nub
$
map
fun
ts
xs
=
[
((
x
,
y
),
1
)
|
x
<-
ts'
,
y
<-
ts'
,
x
>=
y
]
------------------------------------------------------------------------
------------------------------------------------------------------------
coocOnContexts
::
(
a
->
[
Text
])
->
[[
a
]]
->
Map
([
Text
],
[
Text
])
Int
coocOnContexts
::
(
a
->
[
Text
])
->
[[
a
]]
->
Map
([
Text
],
[
Text
])
Int
...
@@ -127,23 +62,9 @@ coocOnSingleContext fun ts = xs
...
@@ -127,23 +62,9 @@ coocOnSingleContext fun ts = xs
,
x
>=
y
,
x
>=
y
]
]
------------------------------------------------------------------------
------------------------------------------------------------------------
-- | Compute the grouped occurrences (occ)
occurrences
::
[
Terms
]
->
Map
Grouped
(
Map
Terms
Int
)
occurrences
=
occurrencesOn
_terms_stem
occurrencesOn
::
(
Ord
a
,
Ord
b
)
=>
(
a
->
b
)
->
[
a
]
->
Map
b
(
Map
a
Int
)
occurrencesOn
f
=
foldl'
(
\
m
a
->
insertWith
(
unionWith
(
+
))
(
f
a
)
(
singleton
a
1
)
m
)
empty
occurrencesWith
::
(
Foldable
list
,
Ord
k
,
Num
a
,
Show
k
,
Show
a
,
Show
(
list
b
))
=>
(
b
->
k
)
->
list
b
->
Map
k
a
occurrencesWith
::
(
Foldable
list
,
Ord
k
,
Num
a
,
Show
k
,
Show
a
,
Show
(
list
b
))
=>
(
b
->
k
)
->
list
b
->
Map
k
a
occurrencesWith
f
xs
=
trace
(
show
(
xs
,
m
)
::
Text
)
m
occurrencesWith
f
xs
=
trace
(
show
(
xs
,
m
)
::
Text
)
m
where
where
m
=
foldl'
(
\
x
y
->
insertWith
(
+
)
(
f
y
)
1
x
)
empty
xs
m
=
foldl'
(
\
x
y
->
insertWith
(
+
)
(
f
y
)
1
x
)
empty
xs
-- TODO add groups and filter stops
-- TODO add groups and filter stops
sumOcc
::
Ord
a
=>
[
Occ
a
]
->
Occ
a
sumOcc
xs
=
unionsWith
(
+
)
xs
src/Gargantext/Core/Text/Metrics/FrequentItemSet.hs
View file @
e69e0599
...
@@ -15,7 +15,6 @@ Domain Specific Language to manage Frequent Item Set (FIS)
...
@@ -15,7 +15,6 @@ Domain Specific Language to manage Frequent Item Set (FIS)
module
Gargantext.Core.Text.Metrics.FrequentItemSet
module
Gargantext.Core.Text.Metrics.FrequentItemSet
(
Fis
,
Size
(
..
)
(
Fis
,
Size
(
..
)
,
occ_hlcm
,
cooc_hlcm
,
occ_hlcm
,
cooc_hlcm
,
allFis
,
between
,
fisWithSize
,
fisWithSize
,
fisWith
,
fisWith
,
fisWithSizePoly
,
fisWithSizePoly
...
@@ -43,18 +42,6 @@ occ_hlcm = fisWithSize (Point 1)
...
@@ -43,18 +42,6 @@ occ_hlcm = fisWithSize (Point 1)
cooc_hlcm
::
Frequency
->
[[
Item
]]
->
[
Fis
]
cooc_hlcm
::
Frequency
->
[[
Item
]]
->
[
Fis
]
cooc_hlcm
=
fisWithSize
(
Point
2
)
cooc_hlcm
=
fisWithSize
(
Point
2
)
allFis
::
Frequency
->
[[
Item
]]
->
[
Fis
]
allFis
=
fisWith
Nothing
------------------------------------------------------------------------
between
::
(
Int
,
Int
)
->
Frequency
->
[[
Item
]]
->
[
Fis
]
between
(
x
,
y
)
=
fisWithSize
(
Segment
x
y
)
--maximum :: Int -> Frequency -> [[Item]] -> [Fis]
--maximum m = between (0,m)
------------------------------------------------------------------------
------------------------------------------------------------------------
------------------------------------------------------------------------
-- | Data type to type the Frequent Item Set
-- | Data type to type the Frequent Item Set
-- TODO replace List with Set in fisItemSet
-- TODO replace List with Set in fisItemSet
...
...
src/Gargantext/Core/Text/Metrics/TFICF.hs
View file @
e69e0599
...
@@ -20,14 +20,9 @@ module Gargantext.Core.Text.Metrics.TFICF
...
@@ -20,14 +20,9 @@ module Gargantext.Core.Text.Metrics.TFICF
,
Total
(
..
)
,
Total
(
..
)
,
Count
(
..
)
,
Count
(
..
)
,
tficf
,
tficf
,
sortTficf
)
)
where
where
import
Data.List
qualified
as
List
import
Data.Map.Strict
(
toList
)
import
Data.Ord
qualified
as
DO
(
Down
(
..
))
import
Gargantext.Core.Types
(
Ordering
(
..
))
import
Gargantext.Prelude
hiding
(
Down
,
Ordering
,
toList
)
import
Gargantext.Prelude
hiding
(
Down
,
Ordering
,
toList
)
path
::
Text
path
::
Text
...
@@ -64,11 +59,3 @@ tficf (TficfInfra (Count ic) (Total it) )
...
@@ -64,11 +59,3 @@ tficf (TficfInfra (Count ic) (Total it) )
<>
", sc = "
<>
show
sc
<>
", sc = "
<>
show
sc
<>
", st = "
<>
show
st
<>
", st = "
<>
show
st
tficf
_
_
=
panicTrace
$
"[ERR]"
<>
path
<>
"Undefined for these contexts"
tficf
_
_
=
panicTrace
$
"[ERR]"
<>
path
<>
"Undefined for these contexts"
sortTficf
::
Ordering
->
Map
Text
Double
->
[(
Text
,
Double
)]
sortTficf
Down
=
List
.
sortOn
(
DO
.
Down
.
snd
)
.
toList
sortTficf
Up
=
List
.
sortOn
snd
.
toList
src/Gargantext/Core/Text/Metrics/Utils.hs
deleted
100644 → 0
View file @
52110ac8
{-|
Module : Gargantext.Core.Text.Metrics.Utils
Description : Some functions to count.
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
module
Gargantext.Core.Text.Metrics.Utils
where
import
Data.List
qualified
as
L
import
Data.Map.Strict
qualified
as
DM
import
Gargantext.Prelude
countElem
::
(
Ord
k
)
=>
DM
.
Map
k
Int
->
k
->
DM
.
Map
k
Int
countElem
m
e
=
DM
.
insertWith
(
+
)
e
1
m
freq
::
(
Ord
k
)
=>
[
k
]
->
DM
.
Map
k
Int
freq
=
foldl
countElem
DM
.
empty
getMaxFromMap
::
Ord
a
=>
Map
a1
a
->
[
a1
]
getMaxFromMap
m
=
go
[]
Nothing
(
DM
.
toList
m
)
where
go
ks
_
[]
=
ks
go
ks
Nothing
((
k
,
v
)
:
rest
)
=
go
(
k
:
ks
)
(
Just
v
)
rest
go
ks
(
Just
u
)
((
k
,
v
)
:
rest
)
|
v
<
u
=
go
ks
(
Just
u
)
rest
|
v
>
u
=
go
[
k
]
(
Just
v
)
rest
|
otherwise
=
go
(
k
:
ks
)
(
Just
v
)
rest
average
::
[
Double
]
->
Double
average
x
=
L
.
sum
x
/
L
.
genericLength
x
average'
::
[
Int
]
->
Double
average'
x
=
(
L
.
sum
y
)
/
(
L
.
genericLength
y
)
where
y
=
L
.
map
fromIntegral
x
src/Gargantext/Core/Text/Terms/Eleve.hs
View file @
e69e0599
...
@@ -61,16 +61,8 @@ nan = 0 / 0
...
@@ -61,16 +61,8 @@ nan = 0 / 0
noNaNs
::
P
.
RealFloat
e
=>
[
e
]
->
[
e
]
noNaNs
::
P
.
RealFloat
e
=>
[
e
]
->
[
e
]
noNaNs
=
filter
(
not
.
P
.
isNaN
)
noNaNs
=
filter
(
not
.
P
.
isNaN
)
updateIfDefined
::
P
.
RealFloat
e
=>
e
->
e
->
e
updateIfDefined
e0
e
|
P
.
isNaN
e
=
e0
|
otherwise
=
e
sim
::
Entropy
e
=>
e
->
e
->
Bool
sim
::
Entropy
e
=>
e
->
e
->
Bool
sim
x
y
=
x
==
y
||
(
P
.
isNaN
x
&&
P
.
isNaN
y
)
sim
x
y
=
x
==
y
||
(
P
.
isNaN
x
&&
P
.
isNaN
y
)
subst
::
Entropy
e
=>
(
e
,
e
)
->
e
->
e
subst
(
src
,
dst
)
x
|
sim
src
x
=
dst
|
otherwise
=
x
------------------------------------------------------------------------
------------------------------------------------------------------------
-- | TODO: Show Instance only used for debugging
-- | TODO: Show Instance only used for debugging
...
@@ -169,14 +161,6 @@ toTree k (Node c e cs) = Tree.Node (k, c, Just e) (map (uncurry toTree) $ Map.t
...
@@ -169,14 +161,6 @@ toTree k (Node c e cs) = Tree.Node (k, c, Just e) (map (uncurry toTree) $ Map.t
normalizeLevel
::
Entropy
e
=>
e
->
e
->
e
->
e
normalizeLevel
::
Entropy
e
=>
e
->
e
->
e
->
e
normalizeLevel
m
v
e
=
(
e
-
m
)
/
v
normalizeLevel
m
v
e
=
(
e
-
m
)
/
v
{- Unused
nodeChildren :: Trie k e -> Map k (Trie k e)
nodeChildren (Node _ _ cs) = cs
nodeChildren (Leaf _) = Map.empty
-}
chunkAlongEleve
::
Int
->
[
a
]
->
[[
a
]]
chunkAlongEleve
::
Int
->
[
a
]
->
[[
a
]]
chunkAlongEleve
n
xs
=
L
.
take
n
<$>
L
.
tails
xs
chunkAlongEleve
n
xs
=
L
.
take
n
<$>
L
.
tails
xs
...
@@ -317,38 +301,6 @@ mayCons :: [a] -> [[a]] -> [[a]]
...
@@ -317,38 +301,6 @@ mayCons :: [a] -> [[a]] -> [[a]]
mayCons
[]
xss
=
xss
mayCons
[]
xss
=
xss
mayCons
xs
xss
=
xs
:
xss
mayCons
xs
xss
=
xs
:
xss
{-
split :: (IsTrie trie, Entropy e) => Lens' i e -> trie Token i -> [Token] -> [[Token]]
split _ _ [] = []
split inE t (Terminal Start:xs) = split inE t xs
split inE t (x0:xs0) = go [x0] xs0
where
go pref [] = [pref]
go pref (Terminal Stop:_) = [pref]
go _ (Terminal Start:_) = panic "split impossible"
go pref (x:xs) =
-- trace (show (if acc then "ACC" else "CUT", (prefx, epxt), if acc then ">" else "<=", ((pref, ept), "+", ([x], ext)))) $
if acc
then go prefx xs
else mayCons pref $ go [x] xs
where
prefx = pref <> [x]
pt = findTrie pref t
pxt = findTrie prefx t
xt = findTrie [x] t
ept = ne pt
-- ^ entropy of the current prefix
ext = ne xt
-- ^ entropy of [x]
epxt = ne pxt
-- ^ entropy of the current prefix plus x
acc = P.isNaN ept || P.isNaN ext || not (P.isNaN epxt) -- && (epxt > mean [ept, ext])
-- aut(["in","this","paper"]) > aut(["in","this"]) + aut(["paper"])
ne = nodeEntropy inE
-}
split
::
Entropy
e
=>
Int
->
Lens'
i
e
->
Tries
Token
i
->
[
Token
]
->
[[
Text
]]
split
::
Entropy
e
=>
Int
->
Lens'
i
e
->
Tries
Token
i
->
[
Token
]
->
[[
Text
]]
split
_
_
_
[]
=
[]
split
_
_
_
[]
=
[]
split
_
_
_
[
t
]
=
pure
<$>
nonTerminals
[
t
]
split
_
_
_
[
t
]
=
pure
<$>
nonTerminals
[
t
]
...
@@ -357,32 +309,8 @@ split n inE t ts = nonTerminals pref `mayCons` split n inE t (drop (length pref
...
@@ -357,32 +309,8 @@ split n inE t ts = nonTerminals pref `mayCons` split n inE t (drop (length pref
pref
=
maximumWith
(
\
ks
->
nodeEntropy
inE
$
findTrie
ks
t
)
pref
=
maximumWith
(
\
ks
->
nodeEntropy
inE
$
findTrie
ks
t
)
(
L
.
tail
.
L
.
inits
.
take
n
$
ts
)
(
L
.
tail
.
L
.
inits
.
take
n
$
ts
)
{-
split :: Entropy e => Lens' i e -> Tries Token i -> [Token] -> [[Token]]
split inE t0 ts =
maximumWith (sum . map $ nodeAutonomy inE t0) (all the splits of ts)
-}
------------------------------------------------------------------------
------------------------------------------------------------------------
mainEleve
::
Int
->
[[
Text
]]
->
[[[
Text
]]]
mainEleve
n
x
=
mainEleve'
n
x
x
mainEleve'
::
Int
->
[[
Text
]]
->
[[
Text
]]
->
[[[
Text
]]]
mainEleve'
n
x
y
=
mainEleveWith
x'
n
y
where
x'
=
buildTries
n
(
fmap
toToken
x
)
-- (fmap toToken i) is computed twice, since mainEleveWith is computing it too
-- | This function should take the longest possible chain of:
-- mainEleve'' n x y = maxChainSizeOf [ mainEleve' n x y
-- , mainEleve' n x x
-- , mainEleve' n y y
-- ]
mainEleve''
::
Int
->
[[
Text
]]
->
[[
Text
]]
->
[[[
Text
]]]
mainEleve''
=
undefined
mainEleveWith
::
Tries
Token
()
->
Int
->
[[
Text
]]
->
[[[
Text
]]]
mainEleveWith
::
Tries
Token
()
->
Int
->
[[
Text
]]
->
[[[
Text
]]]
mainEleveWith
m
n
i
=
fmap
(
split
n
info_autonomy
t
)
(
fmap
toToken
i
)
mainEleveWith
m
n
i
=
fmap
(
split
n
info_autonomy
t
)
(
fmap
toToken
i
)
where
where
...
...
src/Gargantext/Core/Text/Terms/Mono/Token/En.hs
View file @
e69e0599
...
@@ -21,7 +21,6 @@ module Gargantext.Core.Text.Terms.Mono.Token.En
...
@@ -21,7 +21,6 @@ module Gargantext.Core.Text.Terms.Mono.Token.En
,
punctuation
,
punctuation
,
finalPunctuation
,
finalPunctuation
,
initialPunctuation
,
initialPunctuation
,
allPunctuation
,
contractions
,
contractions
,
negatives
,
negatives
)
)
...
@@ -109,14 +108,6 @@ initialPunctuation x = E $ filter (not . T.null . unwrap) $
...
@@ -109,14 +108,6 @@ initialPunctuation x = E $ filter (not . T.null . unwrap) $
|
otherwise
->
[
Right
ps
|
otherwise
->
[
Right
ps
,
Right
w
]
,
Right
w
]
-- | Split tokens on transitions between punctuation and
-- non-punctuation characters. This tokenizer is not included in
-- defaultTokenizer pipeline because dealing with word-internal
-- punctuation is quite application specific.
allPunctuation
::
Tokenizer
allPunctuation
=
E
.
map
Right
.
T
.
groupBy
(
\
a
b
->
Char
.
isPunctuation
a
==
Char
.
isPunctuation
b
)
-- | Split words ending in n't, and freeze n't
-- | Split words ending in n't, and freeze n't
negatives
::
Tokenizer
negatives
::
Tokenizer
negatives
x
|
"n't"
`
T
.
isSuffixOf
`
x
=
E
[
Right
.
T
.
reverse
.
T
.
drop
3
.
T
.
reverse
$
x
negatives
x
|
"n't"
`
T
.
isSuffixOf
`
x
=
E
[
Right
.
T
.
reverse
.
T
.
drop
3
.
T
.
reverse
$
x
...
...
src/Gargantext/Core/Text/Terms/Multi/PosTagging.hs
View file @
e69e0599
...
@@ -21,9 +21,6 @@ Source: https://en.wikipedia.org/wiki/Part-of-speech_tagging
...
@@ -21,9 +21,6 @@ Source: https://en.wikipedia.org/wiki/Part-of-speech_tagging
{-# OPTIONS_GHC -fno-warn-deprecations #-}
{-# OPTIONS_GHC -fno-warn-deprecations #-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE TypeOperators #-}
module
Gargantext.Core.Text.Terms.Multi.PosTagging
where
module
Gargantext.Core.Text.Terms.Multi.PosTagging
where
import
Data.Aeson
import
Data.Aeson
...
@@ -39,9 +36,7 @@ import Gargantext.Prelude hiding (ByteString, toLower)
...
@@ -39,9 +36,7 @@ import Gargantext.Prelude hiding (ByteString, toLower)
import
Network.HTTP.Simple
import
Network.HTTP.Simple
import
Network.URI
(
URI
(
..
))
import
Network.URI
(
URI
(
..
))
-- import qualified Gargantext.Utils.SpacyNLP as SpacyNLP
------------------------------------------------------------------------
------------------------------------------------------------------------
------------------------------------------------------------------------
tokens2tokensTags
::
[
Token
]
->
[
TokenTag
]
tokens2tokensTags
::
[
Token
]
->
[
TokenTag
]
tokens2tokensTags
ts
=
filter'
$
map
tokenTag
ts
tokens2tokensTags
ts
=
filter'
$
map
tokenTag
ts
...
@@ -132,23 +127,3 @@ corenlp :: URI -> Lang -> Text -> IO PosSentences
...
@@ -132,23 +127,3 @@ corenlp :: URI -> Lang -> Text -> IO PosSentences
corenlp
uri
lang
txt
=
do
corenlp
uri
lang
txt
=
do
response
<-
corenlp'
uri
lang
txt
response
<-
corenlp'
uri
lang
txt
pure
(
getResponseBody
response
)
pure
(
getResponseBody
response
)
-- | parseWith
-- Part Of Speech example
-- parseWith _tokenPos "Hello world."
-- == [[("``","``"),("Hello","UH"),("world","NN"),(".","."),("''","''")]]
-- Named Entity Recognition example
-- parseWith _tokenNer "Hello world of Peter."
-- [[("``","O"),("Hello","O"),("world","O"),("of","O"),("Peter","PERSON"),(".","O"),("''","O")]]
tokenWith
::
URI
->
(
Token
->
t
)
->
Lang
->
Text
->
IO
[[(
Text
,
t
)]]
tokenWith
uri
f
lang
s
=
map
(
map
(
\
t
->
(
_tokenWord
t
,
f
t
)))
<$>
map
_sentenceTokens
<$>
_sentences
<$>
corenlp
uri
lang
s
----------------------------------------------------------------------------------
-- Here connect to the JohnSnow Server as it has been done above with the corenlp'
-- We need the PosTagging according to the language and the lems
serverNLP
::
Lang
->
Text
->
IO
PosSentences
serverNLP
=
undefined
src/Gargantext/Core/Text/Terms/Multi/RAKE.hs
View file @
e69e0599
...
@@ -28,18 +28,13 @@ list quality in time.
...
@@ -28,18 +28,13 @@ list quality in time.
-}
-}
module
Gargantext.Core.Text.Terms.Multi.RAKE
(
multiterms_rake
,
select
,
hardStopList
)
module
Gargantext.Core.Text.Terms.Multi.RAKE
(
multiterms_rake
,
hardStopList
)
where
where
import
Gargantext.Core.Text.Samples.EN
(
stopList
)
import
Gargantext.Core.Text.Samples.EN
(
stopList
)
import
Gargantext.Prelude
import
Gargantext.Prelude
import
NLP.RAKE.Text
import
NLP.RAKE.Text
select
::
Double
->
[
a
]
->
[
a
]
select
part
ns
=
take
n
ns
where
n
=
round
$
part
*
(
fromIntegral
$
length
ns
)
multiterms_rake
::
Text
->
[
WordScore
]
multiterms_rake
::
Text
->
[
WordScore
]
multiterms_rake
=
candidates
hardStopList
multiterms_rake
=
candidates
hardStopList
...
...
src/Gargantext/Database/Action/Flow/Pairing.hs
View file @
e69e0599
...
@@ -24,11 +24,11 @@ import Data.HashMap.Strict qualified as HashMap
...
@@ -24,11 +24,11 @@ import Data.HashMap.Strict qualified as HashMap
import
Data.List
qualified
as
List
import
Data.List
qualified
as
List
import
Data.Set
qualified
as
Set
import
Data.Set
qualified
as
Set
import
Data.Text
qualified
as
Text
import
Data.Text
qualified
as
Text
import
Data.Text.Metrics
(
levenshtein
)
import
Gargantext.API.Ngrams.Tools
(
filterListWithRoot
,
getRepo
,
groupNodesByNgrams
,
mapTermListRoot
)
import
Gargantext.API.Ngrams.Tools
(
filterListWithRoot
,
getRepo
,
groupNodesByNgrams
,
mapTermListRoot
)
import
Gargantext.API.Ngrams.Types
(
NgramsTerm
(
..
))
import
Gargantext.API.Ngrams.Types
(
NgramsTerm
(
..
))
import
Gargantext.Core
(
HasDBid
(
toDBid
)
)
import
Gargantext.Core
(
HasDBid
(
toDBid
)
)
import
Gargantext.Core.NodeStory.Types
(
HasNodeStory
)
import
Gargantext.Core.NodeStory.Types
(
HasNodeStory
)
import
Gargantext.Core.Text.Metrics.CharByChar
(
levenshtein
)
import
Gargantext.Core.Text.Ngrams
(
NgramsType
(
..
))
import
Gargantext.Core.Text.Ngrams
(
NgramsType
(
..
))
import
Gargantext.Core.Types
(
TableResult
(
..
))
import
Gargantext.Core.Types
(
TableResult
(
..
))
import
Gargantext.Core.Types.Main
(
ListType
(
CandidateTerm
,
MapTerm
)
)
import
Gargantext.Core.Types.Main
(
ListType
(
CandidateTerm
,
MapTerm
)
)
...
...
test/Test/Ngrams/Metrics.hs
deleted
100644 → 0
View file @
52110ac8
{-|
Module : Ngrams.Metrics
Description :
Copyright : Ngrams.Metrics (c)
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
Here is a longer description of this module, containing some
commentary with @some markup@.
-}
{-# OPTIONS_GHC -fno-warn-orphans #-}
{-# LANGUAGE CPP #-}
--module Ngrams.Metrics (main) where
module
Test.Ngrams.Metrics
where
{-
import Data.Text (Text)
import qualified Data.Text as T
import Data.Ratio
import Test.Hspec
import Test.QuickCheck
import Gargantext.Prelude
import Gargantext.Text.Metrics
#if !MIN_VERSION_base(4,8,0)
import Control.Applicative
#endif
instance Arbitrary Text where
arbitrary = T.pack <$> arbitrary
main :: IO ()
main = hspec spec
spec :: Spec
spec = do
describe "levenshtein" $ do
testSwap levenshtein
context "with concrete examples" $ do
testPair levenshtein "kitten" "sitting" 3
testPair levenshtein "cake" "drake" 2
testPair levenshtein "saturday" "sunday" 3
testPair levenshtein "red" "wax" 3
#if __GLASGOW_HASKELL__ >= 710
testPair levenshtein "a😀c" "abc" 1
#endif
testPair levenshtein "lucky" "lucky" 0
testPair levenshtein "" "" 0
describe "levenshteinNorm" $ do
testSwap levenshteinNorm
testPair levenshteinNorm "kitten" "sitting" (4 % 7)
testPair levenshteinNorm "cake" "drake" (3 % 5)
testPair levenshteinNorm "saturday" "sunday" (5 % 8)
testPair levenshteinNorm "red" "wax" (0 % 1)
#if __GLASGOW_HASKELL__ >= 710
testPair levenshteinNorm "a😀c" "abc" (2 % 3)
#endif
testPair levenshteinNorm "lucky" "lucky" (1 % 1)
testPair levenshteinNorm "" "" (1 % 1)
describe "damerauLevenshtein" $ do
testSwap damerauLevenshtein
testPair damerauLevenshtein "veryvery long" "very long" 4
testPair damerauLevenshtein "thing" "think" 1
testPair damerauLevenshtein "nose" "ones" 2
testPair damerauLevenshtein "thing" "sign" 3
testPair damerauLevenshtein "red" "wax" 3
#if __GLASGOW_HASKELL__ >= 710
testPair damerauLevenshtein "a😀c" "abc" 1
#endif
testPair damerauLevenshtein "lucky" "lucky" 0
testPair damerauLevenshtein "" "" 0
describe "damerauLevenshteinNorm" $ do
testSwap damerauLevenshteinNorm
testPair damerauLevenshteinNorm "veryvery long" "very long" (9 % 13)
testPair damerauLevenshteinNorm "thing" "think" (4 % 5)
testPair damerauLevenshteinNorm "nose" "ones" (1 % 2)
testPair damerauLevenshteinNorm "thing" "sign" (2 % 5)
testPair damerauLevenshteinNorm "red" "wax" (0 % 1)
#if __GLASGOW_HASKELL__ >= 710
testPair damerauLevenshteinNorm "a😀c" "abc" (2 % 3)
#endif
testPair damerauLevenshteinNorm "lucky" "lucky" (1 % 1)
testPair damerauLevenshteinNorm "" "" (1 % 1)
describe "hamming" $ do
testSwap hamming
testPair hamming "karolin" "kathrin" (Just 3)
testPair hamming "karolin" "kerstin" (Just 3)
testPair hamming "1011101" "1001001" (Just 2)
testPair hamming "2173896" "2233796" (Just 3)
testPair hamming "toned" "roses" (Just 3)
testPair hamming "red" "wax" (Just 3)
#if __GLASGOW_HASKELL__ >= 710
testPair hamming "a😀c" "abc" (Just 1)
#endif
testPair hamming "lucky" "lucky" (Just 0)
testPair hamming "" "" (Just 0)
testPair hamming "small" "big" Nothing
describe "overlap" $ do
testSwap overlap
testPair overlap "fly" "butterfly" (1 % 1)
testPair overlap "night" "nacht" (3 % 5)
testPair overlap "context" "contact" (5 % 7)
testPair overlap "red" "wax" (0 % 1)
#if __GLASGOW_HASKELL__ >= 710
testPair overlap "a😀c" "abc" (2 % 3)
#endif
testPair overlap "lucky" "lucky" (1 % 1)
describe "jaccard" $ do
testSwap jaccard
testPair jaccard "xxx" "xyx" (1 % 2)
testPair jaccard "night" "nacht" (3 % 7)
testPair jaccard "context" "contact" (5 % 9)
#if __GLASGOW_HASKELL__ >= 710
testPair overlap "a😀c" "abc" (2 % 3)
#endif
testPair jaccard "lucky" "lucky" (1 % 1)
-- | Test that given function returns the same results when order of
-- arguments is swapped.
testSwap :: (Eq a, Show a) => (Text -> Text -> a) -> SpecWith ()
testSwap f = context "if we swap the arguments" $
it "produces the same result" $
property $ \a b -> f a b === f b a
-- | Create spec for given metric function applying it to two 'Text' values
-- and comparing the result with expected one.
testPair :: (Eq a, Show a)
=> (Text -> Text -> a) -- ^ Function to test
-> Text -- ^ First input
-> Text -- ^ Second input
-> a -- ^ Expected result
-> SpecWith ()
testPair f a b r = it ("‘" <> T.unpack a <> "’ and ‘" <> T.unpack b <> "’") $
f a b `shouldBe` r
-}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment