Commit 4a6004b4 authored by Alexandre Delanoë's avatar Alexandre Delanoë

[FIX] hash functions (Set ordered)

parent 4fdd7798
......@@ -48,7 +48,7 @@ import Gargantext.Database.Query.Table.NodeNode (selectDocNodes)
import Gargantext.Database.Schema.Node (_node_id, _node_hyperdata)
import Gargantext.Database.Schema.Ngrams (NgramsType(..))
import Gargantext.Prelude
import Gargantext.Prelude.Utils (sha)
import Gargantext.Prelude.Utils (hashFromSet, hashFromList)
-- Corpus Export
......@@ -97,6 +97,7 @@ type API = Summary "Corpus Export"
:> Get '[JSON] Corpus
--------------------------------------------------
-- | Hashes are ordered by Set
getCorpus :: CorpusId
-> Maybe ListId
-> Maybe NgramsType
......@@ -114,15 +115,14 @@ getCorpus cId lId nt' = do
repo <- getRepo
ngs <- getNodeNgrams cId lId nt repo
let -- uniqId is hash computed already for each document imported in database
r = Map.intersectionWith (\a b -> Document a (Ngrams (Set.toList b) (ng_hash b)) (d_hash a b)
r = Map.intersectionWith (\a b -> Document a (Ngrams (Set.toList b) (hashFromSet b)) (d_hash a b)
) ns ngs
where
ng_hash b = sha $ List.foldl (\x y -> x<>y) "" $ List.sort $ Set.toList b
d_hash a b = sha $ (fromMaybe "" (_hd_uniqId $ _node_hyperdata a))
<> (ng_hash b)
pure $ Corpus (Map.elems r) (sha $ List.foldl (\a b -> a<>b) ""
$ List.map _d_hash $ Map.elems r
d_hash a b = hashFromList [ fromMaybe "" (_hd_uniqId $ _node_hyperdata a)
, hashFromSet b
]
pure $ Corpus (Map.elems r) (hashFromList $ List.map _d_hash
$ Map.elems r
)
getNodeNgrams :: HasNodeError err
......
......@@ -13,6 +13,8 @@ Portability : POSIX
module Gargantext.Prelude.Utils
where
import Data.Set (Set)
import Data.List (foldl)
import Control.Lens (view)
import Control.Monad.Random.Class (MonadRandom)
import Control.Monad.Reader (MonadReader)
......@@ -26,6 +28,7 @@ import System.Directory (createDirectoryIfMissing)
import System.Random (newStdGen)
import qualified Data.ByteString.Lazy.Char8 as Char
import qualified Data.Digest.Pure.SHA as SHA (sha256, showDigest)
import qualified Data.Set as Set
import qualified Data.Text as Text
import qualified System.Random.Shuffle as SRS
......@@ -34,13 +37,29 @@ shuffle :: MonadRandom m => [a] -> m [a]
shuffle ns = SRS.shuffleM ns
--------------------------------------------------------------------------
sha :: Text -> Text
-- | Use this datatype to keep traceability of hashes
-- TODO use newtype
type Hash = Text
-- | API to hash text
-- using sha256 for now
hash :: Text -> Hash
hash = sha
-- | Sugar fun to sha256 Text
sha :: Text -> Hash
sha = Text.pack
. SHA.showDigest
. SHA.sha256
. Char.pack
. Text.unpack
hashFromList :: [Hash] -> Hash
hashFromList = hashFromSet . Set.fromList
hashFromSet :: Set Hash -> Hash
hashFromSet = sha . foldl (<>) "" . Set.toList
--------------------------------------------------------------------------
data NodeToHash = NodeToHash { nodeType :: NodeType
, nodeId :: NodeId
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment