Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
H
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Przemyslaw Kaminski
haskell-gargantext
Commits
cd4899c0
Commit
cd4899c0
authored
Dec 03, 2021
by
Alexandre Delanoë
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
[OPTIM] Order1 similarity optimized (parallel computation)
parent
f7b1528f
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
26 additions
and
127 deletions
+26
-127
Types.hs
src/Gargantext/API/Ngrams/Types.hs
+2
-1
Conditional.hs
src/Gargantext/Core/Methods/Distances/Conditional.hs
+24
-126
No files found.
src/Gargantext/API/Ngrams/Types.hs
View file @
cd4899c0
...
...
@@ -11,6 +11,7 @@ module Gargantext.API.Ngrams.Types where
import
Codec.Serialise
(
Serialise
())
import
Control.Category
((
>>>
))
import
Control.DeepSeq
(
NFData
)
import
Control.Lens
(
makeLenses
,
makePrisms
,
Iso
'
,
iso
,
from
,
(
.~
),
(
?=
),
(
#
),
to
,
folded
,
{-withIndex, ifolded,-}
view
,
use
,
(
^.
),
(
^?
),
(
%~
),
(
.~
),
(
%=
),
at
,
_Just
,
Each
(
..
),
itraverse_
,
both
,
forOf_
,
(
?~
))
import
Control.Monad.State
import
Data.Aeson
hiding
((
.=
))
...
...
@@ -121,7 +122,7 @@ instance (ToJSONKey a, ToSchema a) => ToSchema (MSet a) where
------------------------------------------------------------------------
newtype
NgramsTerm
=
NgramsTerm
{
unNgramsTerm
::
Text
}
deriving
(
Ord
,
Eq
,
Show
,
Generic
,
ToJSONKey
,
ToJSON
,
FromJSON
,
Semigroup
,
Arbitrary
,
Serialise
,
ToSchema
,
Hashable
)
deriving
(
Ord
,
Eq
,
Show
,
Generic
,
ToJSONKey
,
ToJSON
,
FromJSON
,
Semigroup
,
Arbitrary
,
Serialise
,
ToSchema
,
Hashable
,
NFData
)
instance
IsHashable
NgramsTerm
where
hash
(
NgramsTerm
t
)
=
hash
t
...
...
src/Gargantext/Core/Methods/Distances/Conditional.hs
View file @
cd4899c0
...
...
@@ -15,37 +15,34 @@ Motivation and definition of the @Conditional@ distance.
module
Gargantext.Core.Methods.Distances.Conditional
where
import
Control.Parallel.Strategies
(
parList
,
rdeepseq
,
using
)
import
Data.Hashable
(
Hashable
)
import
Data.List
(
unzip
)
import
Data.Maybe
(
catMaybes
)
import
Gargantext.Prelude
import
qualified
Data.HashMap.Strict
as
Map
import
qualified
Data.Set
as
Set
import
Data.Hashable
(
Hashable
)
import
Control.DeepSeq
(
NFData
)
type
HashMap
=
Map
.
HashMap
------------------------------------------------------------------------
-- First version as first implementation
-- - unoptimized but qualitatively verified
getMax
::
(
a
,
a
)
->
Maybe
Double
->
Maybe
Double
->
Maybe
((
a
,
a
),
Double
)
getMax
(
i
,
j
)
(
Just
d
)
Nothing
=
Just
((
i
,
j
),
d
)
getMax
(
i
,
j
)
Nothing
(
Just
d
)
=
Just
((
j
,
i
),
d
)
getMax
ij
(
Just
di
)
(
Just
dj
)
=
if
di
>=
dj
then
getMax
ij
(
Just
di
)
Nothing
else
getMax
ij
Nothing
(
Just
dj
)
getMax
_
_
_
=
Nothing
conditional
::
(
Ord
a
,
Hashable
a
)
=>
HashMap
(
a
,
a
)
Int
->
HashMap
(
a
,
a
)
Double
conditional
m'
=
Map
.
fromList
$
catMaybes
results
-- - qualitatively verified
-- - parallized as main optimization
conditional
::
(
Ord
a
,
Hashable
a
,
NFData
a
)
=>
HashMap
(
a
,
a
)
Int
->
HashMap
(
a
,
a
)
Double
conditional
m'
=
Map
.
fromList
$
((
catMaybes
results'
)
`
using
`
parList
rdeepseq
)
where
results
=
[
let
results
'
=
[
let
ij
=
(
/
)
<$>
Map
.
lookup
(
i
,
j
)
m
<*>
Map
.
lookup
(
i
,
i
)
m
ji
=
(
/
)
<$>
Map
.
lookup
(
j
,
i
)
m
<*>
Map
.
lookup
(
j
,
j
)
m
in
getMax
(
i
,
j
)
ij
ji
|
i
<-
keys
,
j
<-
keys
,
i
<
j
]
|
i
<-
keys
,
j
<-
keys
,
i
<
j
]
-- Converting from Int to Double
m
=
Map
.
map
fromIntegral
m'
...
...
@@ -54,113 +51,14 @@ conditional m' = Map.fromList $ catMaybes results
(
x
,
y
)
=
unzip
$
Map
.
keys
m
getMax
::
(
a
,
a
)
->
Maybe
Double
->
Maybe
Double
->
Maybe
((
a
,
a
),
Double
)
getMax
(
i
,
j
)
(
Just
d
)
Nothing
=
Just
((
i
,
j
),
d
)
getMax
(
i
,
j
)
Nothing
(
Just
d
)
=
Just
((
j
,
i
),
d
)
getMax
ij
(
Just
di
)
(
Just
dj
)
=
if
di
>=
dj
then
getMax
ij
(
Just
di
)
Nothing
else
getMax
ij
Nothing
(
Just
dj
)
getMax
_
_
_
=
Nothing
{-
import Data.List (sortOn)
import Data.Map (Map)
import Data.Matrix hiding (identity)
import Gargantext.Core.Viz.Graph.Utils
import Gargantext.Prelude
import qualified Data.Map as M
import qualified Data.Set as S
import qualified Data.Vector as V
------------------------------------------------------------------------
-- | Optimisation issue
toBeOptimized :: (Num a, Fractional a, Ord a) => Matrix a -> Matrix a
toBeOptimized m = proba Col m
------------------------------------------------------------------------
-- | Main Functions
-- Compute the probability from axis
-- x' = x / (sum Col x)
proba :: (Num a, Fractional a) => Axis -> Matrix a -> Matrix a
proba a m = mapOn a (\c x -> x / V.sum (axis a c m)) m
mapOn :: Axis -> (AxisId -> a -> a) -> Matrix a -> Matrix a
mapOn a f m = V.foldl' f' m (V.enumFromTo 1 (nOf a m))
where
f' m' c = mapOnly a f c m'
mapOnly :: Axis -> (AxisId -> a -> a) -> AxisId -> Matrix a -> Matrix a
mapOnly Col = mapCol
mapOnly Row = mapRow
mapAll :: (a -> a) -> Matrix a -> Matrix a
mapAll f m = mapOn Col (\_ -> f) m
---------------------------------------------------------------
-- | Compute a distance from axis
-- xs = (sum Col x') - x'
distFromSum :: (Num a, Fractional a)
=> Axis -> Matrix a -> Matrix a
distFromSum a m = mapOn a (\c x -> V.sum (axis a c m) - x) m
---------------------------------------------------------------
---------------------------------------------------------------
-- | To compute included/excluded or specific/generic scores
opWith :: (Fractional a1, Num a1)
=> (Matrix a2 -> t -> Matrix a1) -> Matrix a2 -> t -> Matrix a1
opWith op xs ys = mapAll (\x -> x / (2*n -1)) (xs `op` ys)
where
n = fromIntegral $ nOf Col xs
---------------------------------------------------------------
-------------------------------------------------------
conditional :: (Num a, Fractional a, Ord a) => Matrix a -> Matrix a
conditional m = filterMat (threshold m') m'
where
------------------------------------------------------------------------
-- | Main Operations
-- x' = x / (sum Col x)
x' = proba Col m
------------------------------------------------------------------------
-- xs = (sum Col x') - x'
xs = distFromSum Col x'
-- ys = (sum Row x') - x'
ys = distFromSum Row x'
------------------------------------------------------------------------
-- | Top included or excluded
ie = opWith (+) xs ys
-- ie = ( xs + ys) / (2 * (x.shape[0] - 1))
-- | Top specific or generic
sg = opWith (-) xs ys
-- sg = ( xs - ys) / (2 * (x.shape[0] - 1))
nodes_kept :: [Int]
nodes_kept = take k' $ S.toList
$ foldl' (\s (n1,n2) -> insert [n1,n2] s) S.empty
$ map fst
$ nodes_included k <> nodes_specific k
nodes_included n = take n $ sortOn snd $ toListsWithIndex ie
nodes_specific n = take n $ sortOn snd $ toListsWithIndex sg
insert as s = foldl' (\s' a -> S.insert a s') s as
k' = 2*k
k = 10
dico_nodes :: Map Int Int
dico_nodes = M.fromList $ zip ([1..] :: [Int]) nodes_kept
--dico_nodes_rev = M.fromList $ zip nodes_kept [1..]
m' = matrix (length nodes_kept)
(length nodes_kept)
(\(i,j) -> getElem ((M.!) dico_nodes i) ((M.!) dico_nodes j) x')
threshold m'' = V.minimum
$ V.map (\cId -> V.maximum $ getCol cId m'')
(V.enumFromTo 1 (nOf Col m'') )
filterMat t m'' = mapAll (\x -> filter' t x) m''
where
filter' t' x = case (x >= t') of
True -> x
False -> 0
------------------------------------------------------------------------
-}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment