[sqlite] implement ngrams reading for CorpusSQLiteData

parent 625d6bbe
......@@ -19,12 +19,12 @@ import Data.ByteString.Lazy qualified as BSL
import Data.HashMap.Strict qualified as HashMap
import Data.Map.Strict qualified as Map
import Data.Set qualified as Set
import Data.Time.Clock (getCurrentTime)
import Data.Time.Clock (getCurrentTime, UTCTime)
import Data.Time.Format.ISO8601 (iso8601ParseM, iso8601Show)
import Data.Version (parseVersion, showVersion)
import Database.SQLite.Simple qualified as S
import Gargantext.API.Ngrams.Tools (filterListWithRoot, mapTermListRoot, getRepo)
import Gargantext.API.Ngrams.Types ( NgramsTerm(unNgramsTerm) )
import Gargantext.API.Ngrams.Types ( NgramsTerm(..) )
import Gargantext.API.Node.Corpus.Export.Types (CorpusSQLite(..), CorpusSQLiteData(..))
import Gargantext.Core.NodeStory.Types ( HasNodeStoryEnv, NodeListStory )
import Gargantext.Core.Text.Ngrams (NgramsType(NgramsTerms))
......@@ -33,8 +33,9 @@ import Gargantext.Core.Types.Main (ListType(..))
import Gargantext.Database.Action.Metrics.NgramsByContext (getNgramsByContextOnlyUser)
import Gargantext.Database.Admin.Config (userMaster)
import Gargantext.Database.Admin.Types.Hyperdata.Corpus (HyperdataCorpus)
import Gargantext.Database.Admin.Types.Hyperdata.Document (HyperdataDocument)
import Gargantext.Database.Admin.Types.Hyperdata.List (HyperdataList)
import Gargantext.Database.Admin.Types.Node (unNodeId, ContextId(_ContextId), NodeId(UnsafeMkNodeId))
import Gargantext.Database.Admin.Types.Node (unNodeId, ContextId(..), NodeId(UnsafeMkNodeId))
import Gargantext.Database.Prelude (DBCmd, IsDBCmd)
import Gargantext.Database.Schema.Context (context_id, context_name, context_date, context_hyperdata)
import Gargantext.Database.Schema.Node (node_hash_id, node_hyperdata, node_name, node_parent_id)
......@@ -178,7 +179,7 @@ readCorpusSQLite :: ( CES.MonadMask m
=> CorpusSQLite
-> m (Either Text CorpusSQLiteData)
readCorpusSQLite (CorpusSQLite { _cs_bs }) = withTempSQLiteDir $ \(_fp, _fname, fpath) -> liftBase $ do
(info, corpusData, listData, documents) <- S.withConnection fpath $ \conn -> do
(info, corpusData, listData, documents, ngrams) <- S.withConnection fpath $ \conn -> do
[S.Only version] <- S.query_ conn "SELECT value FROM info WHERE key = 'gargVersion'"
[S.Only cId] <- S.query_ conn "SELECT value FROM info WHERE key = 'corpusId'"
[S.Only lId] <- S.query_ conn "SELECT value FROM info WHERE key = 'listId'"
......@@ -187,28 +188,27 @@ readCorpusSQLite (CorpusSQLite { _cs_bs }) = withTempSQLiteDir $ \(_fp, _fname,
[corpusData] <- S.query_ conn "SELECT name, hash, parent_id, hyperdata FROM corpus"
[listData] <- S.query_ conn "SELECT name, parent_id, hyperdata FROM lists"
-- [ngrams] <- S.query_ conn "SELECT context_id, terms, type_ FROM ngrams"
documents <- S.query_ conn "SELECT context_id, name, date, hyperdata FROM documents"
ngrams <- S.query_ conn "SELECT context_id, terms, type_ FROM ngrams"
pure (info, corpusData, listData, documents)
pure (info, corpusData, listData, documents, ngrams)
let (version, cId, lId, created) = info
let (_csd_corpus_name, _csd_corpus_hash, corpusParent, corpusHyperdata) = corpusData
let (_csd_list_name, listParent, listHyperdata) = listData
let parseCtx (ctxId, name, date, hd) =
case ( iso8601ParseM date, Aeson.decode hd ) of
( Just d, Just h ) -> Right ( UnsafeMkNodeId ctxId, name, d, h )
_ -> Left ("Context " <> show ctxId <> " parse error" :: Text)
let (context_errors, _csd_contexts) = partitionEithers (parseCtx <$> documents)
let (ngrams_errors, ngrams_) = partitionEithers (parseNgrams <$> ngrams)
-- TODO pure $ do in Either monad
case ( readP_to_S parseVersion version
, iso8601ParseM created
, Aeson.decode corpusHyperdata
, Aeson.decode listHyperdata
, context_errors ) of
([(_csd_version, _)], Just _csd_created, Just _csd_corpus_hyperdata, Just _csd_list_hyperdata, []) -> do
, context_errors
, ngrams_errors ) of
([(_csd_version, _)], Just _csd_created, Just _csd_corpus_hyperdata, Just _csd_list_hyperdata, [], []) -> do
let _csd_cId = UnsafeMkNodeId cId
let _csd_lId = UnsafeMkNodeId lId
......@@ -216,14 +216,35 @@ readCorpusSQLite (CorpusSQLite { _cs_bs }) = withTempSQLiteDir $ \(_fp, _fname,
let _csd_list_parent = UnsafeMkNodeId <$> listParent
-- TODO
let _csd_map_context_ngrams = Map.empty
let _csd_stop_context_ngrams = Map.empty
let _csd_candidate_context_ngrams = Map.empty
let _csd_map_context_ngrams = filterNgrams MapTerm ngrams_
let _csd_stop_context_ngrams = filterNgrams StopTerm ngrams_
let _csd_candidate_context_ngrams = filterNgrams CandidateTerm ngrams_
pure $ Right $ CorpusSQLiteData { .. }
_ -> pure $ Left "Parse error"
where
parseCtx :: (Int, Text, Prelude.String, BSL.ByteString)
-> Either Text (NodeId, Text, UTCTime, HyperdataDocument)
parseCtx (ctxId, name, date, hd) =
case ( iso8601ParseM date, Aeson.decode hd ) of
( Just d, Just h ) -> Right ( UnsafeMkNodeId ctxId, name, d, h )
_ -> Left ("Context " <> show ctxId <> " parse error" :: Text)
parseNgrams :: (Int, Text, Text) -> Either Text (ListType, (ContextId, NgramsTerm))
parseNgrams (ctxId, term, type_) =
case type_ of
"map" -> Right ( MapTerm, ( UnsafeMkContextId ctxId, NgramsTerm term ) )
"stop" -> Right ( StopTerm, ( UnsafeMkContextId ctxId, NgramsTerm term ) )
"candidate" -> Right ( CandidateTerm, ( UnsafeMkContextId ctxId, NgramsTerm term ) )
_ -> Left ("Unknown term " <> term)
filterNgrams :: ListType -> [(ListType, (ContextId, NgramsTerm))] -> Map ContextId (Set NgramsTerm)
filterNgrams lt ngrams_ = Map.fromListWith (<>) $
map (\(_, (ctxId, term)) -> (ctxId, Set.singleton term))
(filter (\(lt_, _) -> lt == lt_) ngrams_)
withTempSQLiteDir :: (CES.MonadMask m, MonadBase IO m)
=> ((FilePath, Prelude.String, FilePath) -> m a)
-> m a
......
......@@ -65,6 +65,9 @@ tests = sequential $ around withTestDBAndPort $ beforeWith dbEnvSetup $ do
_csd_cId `shouldBe` corpusId
_csd_lId `shouldBe` aliceListId
length _csd_contexts `shouldBe` 2
length _csd_map_context_ngrams `shouldBe` 0
length _csd_stop_context_ngrams `shouldBe` 0
length _csd_candidate_context_ngrams `shouldBe` 0
describe "GET /api/v1.0/corpus/cId/sqlite" $ do
it "returns correct SQLite db" $ \ctx -> do
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment