Commit 4a6004b4 authored by Alexandre Delanoë's avatar Alexandre Delanoë

[FIX] hash functions (Set ordered)

parent 4fdd7798
...@@ -48,7 +48,7 @@ import Gargantext.Database.Query.Table.NodeNode (selectDocNodes) ...@@ -48,7 +48,7 @@ import Gargantext.Database.Query.Table.NodeNode (selectDocNodes)
import Gargantext.Database.Schema.Node (_node_id, _node_hyperdata) import Gargantext.Database.Schema.Node (_node_id, _node_hyperdata)
import Gargantext.Database.Schema.Ngrams (NgramsType(..)) import Gargantext.Database.Schema.Ngrams (NgramsType(..))
import Gargantext.Prelude import Gargantext.Prelude
import Gargantext.Prelude.Utils (sha) import Gargantext.Prelude.Utils (hashFromSet, hashFromList)
-- Corpus Export -- Corpus Export
...@@ -97,6 +97,7 @@ type API = Summary "Corpus Export" ...@@ -97,6 +97,7 @@ type API = Summary "Corpus Export"
:> Get '[JSON] Corpus :> Get '[JSON] Corpus
-------------------------------------------------- --------------------------------------------------
-- | Hashes are ordered by Set
getCorpus :: CorpusId getCorpus :: CorpusId
-> Maybe ListId -> Maybe ListId
-> Maybe NgramsType -> Maybe NgramsType
...@@ -114,15 +115,14 @@ getCorpus cId lId nt' = do ...@@ -114,15 +115,14 @@ getCorpus cId lId nt' = do
repo <- getRepo repo <- getRepo
ngs <- getNodeNgrams cId lId nt repo ngs <- getNodeNgrams cId lId nt repo
let -- uniqId is hash computed already for each document imported in database let -- uniqId is hash computed already for each document imported in database
r = Map.intersectionWith (\a b -> Document a (Ngrams (Set.toList b) (ng_hash b)) (d_hash a b) r = Map.intersectionWith (\a b -> Document a (Ngrams (Set.toList b) (hashFromSet b)) (d_hash a b)
) ns ngs ) ns ngs
where where
ng_hash b = sha $ List.foldl (\x y -> x<>y) "" $ List.sort $ Set.toList b d_hash a b = hashFromList [ fromMaybe "" (_hd_uniqId $ _node_hyperdata a)
d_hash a b = sha $ (fromMaybe "" (_hd_uniqId $ _node_hyperdata a)) , hashFromSet b
<> (ng_hash b) ]
pure $ Corpus (Map.elems r) (hashFromList $ List.map _d_hash
pure $ Corpus (Map.elems r) (sha $ List.foldl (\a b -> a<>b) "" $ Map.elems r
$ List.map _d_hash $ Map.elems r
) )
getNodeNgrams :: HasNodeError err getNodeNgrams :: HasNodeError err
......
...@@ -13,6 +13,8 @@ Portability : POSIX ...@@ -13,6 +13,8 @@ Portability : POSIX
module Gargantext.Prelude.Utils module Gargantext.Prelude.Utils
where where
import Data.Set (Set)
import Data.List (foldl)
import Control.Lens (view) import Control.Lens (view)
import Control.Monad.Random.Class (MonadRandom) import Control.Monad.Random.Class (MonadRandom)
import Control.Monad.Reader (MonadReader) import Control.Monad.Reader (MonadReader)
...@@ -26,6 +28,7 @@ import System.Directory (createDirectoryIfMissing) ...@@ -26,6 +28,7 @@ import System.Directory (createDirectoryIfMissing)
import System.Random (newStdGen) import System.Random (newStdGen)
import qualified Data.ByteString.Lazy.Char8 as Char import qualified Data.ByteString.Lazy.Char8 as Char
import qualified Data.Digest.Pure.SHA as SHA (sha256, showDigest) import qualified Data.Digest.Pure.SHA as SHA (sha256, showDigest)
import qualified Data.Set as Set
import qualified Data.Text as Text import qualified Data.Text as Text
import qualified System.Random.Shuffle as SRS import qualified System.Random.Shuffle as SRS
...@@ -34,13 +37,29 @@ shuffle :: MonadRandom m => [a] -> m [a] ...@@ -34,13 +37,29 @@ shuffle :: MonadRandom m => [a] -> m [a]
shuffle ns = SRS.shuffleM ns shuffle ns = SRS.shuffleM ns
-------------------------------------------------------------------------- --------------------------------------------------------------------------
sha :: Text -> Text -- | Use this datatype to keep traceability of hashes
-- TODO use newtype
type Hash = Text
-- | API to hash text
-- using sha256 for now
hash :: Text -> Hash
hash = sha
-- | Sugar fun to sha256 Text
sha :: Text -> Hash
sha = Text.pack sha = Text.pack
. SHA.showDigest . SHA.showDigest
. SHA.sha256 . SHA.sha256
. Char.pack . Char.pack
. Text.unpack . Text.unpack
hashFromList :: [Hash] -> Hash
hashFromList = hashFromSet . Set.fromList
hashFromSet :: Set Hash -> Hash
hashFromSet = sha . foldl (<>) "" . Set.toList
-------------------------------------------------------------------------- --------------------------------------------------------------------------
data NodeToHash = NodeToHash { nodeType :: NodeType data NodeToHash = NodeToHash { nodeType :: NodeType
, nodeId :: NodeId , nodeId :: NodeId
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment