Commit bf19abb3 authored by Alexandre Delanoë's avatar Alexandre Delanoë

[FEAT] Export with Hash (Tree inspired from Merkle Tree).

parent 3f99bf81
...@@ -30,6 +30,7 @@ module Gargantext.API.Export ...@@ -30,6 +30,7 @@ module Gargantext.API.Export
import Data.Aeson.TH (deriveJSON) import Data.Aeson.TH (deriveJSON)
import Data.Map (Map) import Data.Map (Map)
import Data.Set (Set) import Data.Set (Set)
import Data.Maybe (fromMaybe)
import Data.Swagger import Data.Swagger
import Data.Text (Text) import Data.Text (Text)
import GHC.Generics (Generic) import GHC.Generics (Generic)
...@@ -47,36 +48,51 @@ import Gargantext.Database.Schema.NodeNode (selectDocNodes) ...@@ -47,36 +48,51 @@ import Gargantext.Database.Schema.NodeNode (selectDocNodes)
import Gargantext.Database.Types.Node (Node, HyperdataDocument(..), NodeId, ListId, CorpusId) import Gargantext.Database.Types.Node (Node, HyperdataDocument(..), NodeId, ListId, CorpusId)
import Gargantext.Database.Utils (Cmd) import Gargantext.Database.Utils (Cmd)
import Gargantext.Prelude import Gargantext.Prelude
import Gargantext.Prelude.Utils (sha)
import Servant import Servant
import qualified Data.Map as Map import qualified Data.Map as Map
import qualified Data.Set as Set import qualified Data.Set as Set
import qualified Data.List as List
-- Corpus Export -- Corpus Export
data Corpus = data Corpus =
Corpus { _c_corpus :: [Document] Corpus { _c_corpus :: [Document]
-- , _c_listVersion :: Int , _c_hash :: Hash
, _c_hash :: Text
} deriving (Generic) } deriving (Generic)
-- | Document Export -- | Document Export
data Document = data Document =
Document { _d_document :: Node HyperdataDocument Document { _d_document :: Node HyperdataDocument
, _d_ngrams :: [Text] , _d_ngrams :: Ngrams
-- , _d_hash :: Text , _d_hash :: Hash
} deriving (Generic) } deriving (Generic)
data Ngrams =
Ngrams { _ng_ngrams :: [Text]
, _ng_hash :: Hash
} deriving (Generic)
type Hash = Text
-------
instance ToSchema Corpus where instance ToSchema Corpus where
declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_c_") declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_c_")
instance ToSchema Document where instance ToSchema Document where
declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_d_") declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_d_")
instance ToSchema Ngrams where
declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_ng_")
-------
instance ToParamSchema Corpus where instance ToParamSchema Corpus where
toParamSchema _ = toParamSchema (Proxy :: Proxy TODO) toParamSchema _ = toParamSchema (Proxy :: Proxy TODO)
instance ToParamSchema Document where instance ToParamSchema Document where
toParamSchema _ = toParamSchema (Proxy :: Proxy TODO) toParamSchema _ = toParamSchema (Proxy :: Proxy TODO)
instance ToParamSchema Ngrams where
toParamSchema _ = toParamSchema (Proxy :: Proxy TODO)
-------------------------------------------------- --------------------------------------------------
type API = Summary "Corpus Export" type API = Summary "Corpus Export"
:> "export" :> "export"
...@@ -101,12 +117,17 @@ getCorpus cId lId nt' = do ...@@ -101,12 +117,17 @@ getCorpus cId lId nt' = do
<$> selectDocNodes cId <$> selectDocNodes cId
repo <- getRepo repo <- getRepo
ngs <- getNodeNgrams cId lId nt repo ngs <- getNodeNgrams cId lId nt repo
let r = Map.intersectionWith (\a b -> Document a (Set.toList b)) ns ngs let -- uniqId is hash computed already for each document imported in database
pure $ Corpus (Map.elems r) "HASH_TODO" r = Map.intersectionWith (\a b -> Document a (Ngrams (Set.toList b) (ng_hash b)) (d_hash a b)
) ns ngs
-- getCorpusNgrams :: CorpusId -> ListId -> where
-- Exports List ng_hash b = sha $ Set.foldl (\x y -> x<>y) "" b
-- Version number of the list d_hash a b = sha $ (fromMaybe "" (_hyperdataDocument_uniqId $ _node_hyperdata a))
<> (ng_hash b)
pure $ Corpus (Map.elems r) (sha $ List.foldl (\a b -> a<>b) ""
$ List.map _d_hash $ Map.elems r
)
getNodeNgrams :: HasNodeError err getNodeNgrams :: HasNodeError err
=> CorpusId => CorpusId
...@@ -127,4 +148,11 @@ getNodeNgrams cId lId' nt repo = do ...@@ -127,4 +148,11 @@ getNodeNgrams cId lId' nt repo = do
$(deriveJSON (unPrefix "_c_") ''Corpus) $(deriveJSON (unPrefix "_c_") ''Corpus)
$(deriveJSON (unPrefix "_d_") ''Document) $(deriveJSON (unPrefix "_d_") ''Document)
$(deriveJSON (unPrefix "_ng_") ''Ngrams)
-- TODO
-- Exports List
-- Version number of the list
...@@ -36,12 +36,11 @@ import Crypto.Argon2 as Crypto ...@@ -36,12 +36,11 @@ import Crypto.Argon2 as Crypto
import Data.Either import Data.Either
import Data.ByteString.Base64.URL as URL import Data.ByteString.Base64.URL as URL
--------------------------------------------------------------------------
shuffle :: MonadRandom m => [a] -> m [a] shuffle :: MonadRandom m => [a] -> m [a]
shuffle ns = SRS.shuffleM ns shuffle ns = SRS.shuffleM ns
type FolderPath = FilePath --------------------------------------------------------------------------
type FileName = FilePath
sha :: Text -> Text sha :: Text -> Text
sha = Text.pack sha = Text.pack
. SHA.showDigest . SHA.showDigest
...@@ -49,6 +48,7 @@ sha = Text.pack ...@@ -49,6 +48,7 @@ sha = Text.pack
. Char.pack . Char.pack
. Text.unpack . Text.unpack
--------------------------------------------------------------------------
data NodeToHash = NodeToHash { nodeType :: NodeType data NodeToHash = NodeToHash { nodeType :: NodeType
, nodeId :: NodeId , nodeId :: NodeId
} }
...@@ -58,6 +58,9 @@ secret_key = "WRV5ymit8s~ge6%08dLR7Q!gBcpb1MY%7e67db2206" ...@@ -58,6 +58,9 @@ secret_key = "WRV5ymit8s~ge6%08dLR7Q!gBcpb1MY%7e67db2206"
type SecretKey = ByteString type SecretKey = ByteString
type FolderPath = FilePath
type FileName = FilePath
hashNode :: SecretKey -> NodeToHash -> ByteString hashNode :: SecretKey -> NodeToHash -> ByteString
hashNode sk (NodeToHash nt ni) = case hashResult of hashNode sk (NodeToHash nt ni) = case hashResult of
Left e -> panic (cs $ show e) Left e -> panic (cs $ show e)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment