Commit 986a253c authored by Alexandre Delanoë's avatar Alexandre Delanoë

[DBFLOW] Ngrams, NgramsIndexed, NgramsT a.

parent f6a473f2
...@@ -18,16 +18,20 @@ authors ...@@ -18,16 +18,20 @@ authors
-} -}
{-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE TemplateHaskell #-}
module Gargantext.Database.Flow module Gargantext.Database.Flow
where where
import System.FilePath (FilePath) import System.FilePath (FilePath)
import Control.Lens (makeLenses)
import Data.Maybe (Maybe(..)) import Data.Maybe (Maybe(..))
import Data.Text (Text, unpack) import Data.Text (Text, unpack)
import Data.Map (Map) import Data.Map (Map)
import qualified Data.Map as DM import qualified Data.Map as DM
import GHC.Generics (Generic)
import Gargantext.Core.Types (NodePoly(..)) import Gargantext.Core.Types (NodePoly(..))
import Gargantext.Prelude import Gargantext.Prelude
...@@ -38,6 +42,7 @@ import Gargantext.Database.User (getUser, UserLight(..), Username) ...@@ -38,6 +42,7 @@ import Gargantext.Database.User (getUser, UserLight(..), Username)
import Gargantext.Database.Node.Document.Insert (insertDocuments, ReturnId(..), addUniqIds) import Gargantext.Database.Node.Document.Insert (insertDocuments, ReturnId(..), addUniqIds)
import Gargantext.Database.Node.Document.Add (add) import Gargantext.Database.Node.Document.Add (add)
import Gargantext.Text.Parsers (parseDocs, FileFormat(WOS)) import Gargantext.Text.Parsers (parseDocs, FileFormat(WOS))
import Gargantext.Database.Ngram (insertNgrams, NgramIds(..))
type UserId = Int type UserId = Int
type RootId = Int type RootId = Int
...@@ -109,39 +114,66 @@ data DocumentWithId = DocumentWithId { documentId :: NodeId ...@@ -109,39 +114,66 @@ data DocumentWithId = DocumentWithId { documentId :: NodeId
type NodeId = Int type NodeId = Int
mergeData :: Map HashId ReturnId -> Map HashId HyperdataDocument mergeData :: Map HashId ReturnId -> Map HashId HyperdataDocument -> [DocumentWithId]
-> [DocumentWithId]
mergeData rs hs = map (\(hash,r) -> DocumentWithId (reId r) (lookup' hash hs)) $ DM.toList rs mergeData rs hs = map (\(hash,r) -> DocumentWithId (reId r) (lookup' hash hs)) $ DM.toList rs
where where
lookup' h xs = maybe (panic $ "Error with " <> h) identity (DM.lookup h xs) lookup' h xs = maybe (panic $ "Error with " <> h) identity (DM.lookup h xs)
-- | Main Ngrams Types
-- | Typed Ngrams
-- Typed Ngrams localize the context of the ngrams
-- ngrams in source field of document has Sources Type
-- ngrams in authors field of document has Authors Type
-- ngrams in text (title or abstract) of documents has Terms Type
data NgramsType = Sources | Authors | Terms
data DocumentIdWithNgrams = DocumentIdWithNgrams { documentWithId :: DocumentWithId type NgramId = Int
, document_ngrams :: Map Ngram Int
}
data NgramsType = Sources | Authors | Terms data Ngrams = Ngrams { _ngramsTerms :: Text
, _ngramsSize :: Int
} deriving (Generic)
instance Eq Ngrams where
(==) = (==)
instance Ord Ngrams where
compare = compare
makeLenses ''Ngrams
data NgramsIndexed = NgramsIndexed { _ngrams :: Ngrams
, _ngramsId :: NgramId
} deriving (Generic)
instance Eq NgramsIndexed where
(==) = (==)
instance Ord NgramsIndexed where
compare = compare
makeLenses ''NgramsIndexed
data NgramsT a = NgramsT { _ngramsType :: NgramsType
, _ngramsT :: a
} deriving (Generic)
instance Eq (NgramsT a) where (==) = (==)
instance Ord (NgramsT a) where compare = compare
makeLenses ''NgramsT
-- | Typed Ngrams data DocumentIdWithNgrams = DocumentIdWithNgrams { documentWithId :: DocumentWithId
data Ngrams = Ngrams { ngramsType :: NgramsType , document_ngrams :: Map (NgramsT Ngrams)Int
, ngramsText :: Text
, ngramsSize :: Int
} }
type Ngram = Text
type NgramId = Int
documentIdWithNgrams :: (HyperdataDocument -> Map Ngram Int) -> [DocumentWithId] -> [DocumentIdWithNgrams]
documentIdWithNgrams :: (HyperdataDocument -> Map (NgramsT Ngrams) Int) -> [DocumentWithId] -> [DocumentIdWithNgrams]
documentIdWithNgrams f = map (\d -> DocumentIdWithNgrams d ((f . documentData) d)) documentIdWithNgrams f = map (\d -> DocumentIdWithNgrams d ((f . documentData) d))
-- | TODO check optimization -- | TODO check optimization
mapNodeIdNgrams :: [DocumentIdWithNgrams] -> Map Ngram [(NodeId,Int)] mapNodeIdNgrams :: [DocumentIdWithNgrams] -> Map (NgramsT Ngrams) [(NodeId,Int)]
mapNodeIdNgrams ds = DM.fromListWith (<>) xs mapNodeIdNgrams ds = DM.fromListWith (<>) xs
where where
xs = [(ngId, [(nId, i)]) | (nId, n2i') <- n2i ds, (ngId, i) <- DM.toList n2i'] xs = [(ng, [(nId, i)]) | (nId, n2i') <- n2i ds, (ng, i) <- DM.toList n2i']
n2i = map (\d -> ((documentId . documentWithId) d, document_ngrams d)) n2i = map (\d -> ((documentId . documentWithId) d, document_ngrams d))
--indexNgram :: [Ngram] -> indexNgrams :: Map (NgramsT Ngrams ) [(NodeId, Int)]
-> IO (Map (NgramsT NgramsIndexed) [(NodeId, Int)])
indexNgrams ng2nId = undefined
--let keys = DM.keys ng2nId
...@@ -150,5 +182,7 @@ mapNodeIdNgrams ds = DM.fromListWith (<>) xs ...@@ -150,5 +182,7 @@ mapNodeIdNgrams ds = DM.fromListWith (<>) xs
--indexNgram :: Map Ngram (Map NodeId Int) -> Map NgramId (Map NodeId Int) --indexNgram :: Map Ngram (Map NodeId Int) -> Map NgramId (Map NodeId Int)
--indexNgram = undefined --indexNgram = undefined
-- grouping here -- group Ngrams
-- insert Groups
...@@ -37,6 +37,7 @@ import Gargantext.Database.Node (mkCmd, Cmd(..)) ...@@ -37,6 +37,7 @@ import Gargantext.Database.Node (mkCmd, Cmd(..))
import Prelude import Prelude
import qualified Database.PostgreSQL.Simple as DPS import qualified Database.PostgreSQL.Simple as DPS
import Database.PostgreSQL.Simple.FromField ( FromField, fromField)
import Database.PostgreSQL.Simple.Types (Values(..), QualifiedIdentifier(..)) import Database.PostgreSQL.Simple.Types (Values(..), QualifiedIdentifier(..))
...@@ -73,7 +74,7 @@ import Database.PostgreSQL.Simple.Types (Values(..), QualifiedIdentifier(..)) ...@@ -73,7 +74,7 @@ import Database.PostgreSQL.Simple.Types (Values(..), QualifiedIdentifier(..))
type Ngram = Text type Ngram = Text
type NgramId = Int type NgramId = Int
type SizeN = Int type Size = Int
data NgramIds = NgramIds { ngramId :: Int data NgramIds = NgramIds { ngramId :: Int
, ngramTerms :: Text , ngramTerms :: Text
...@@ -83,19 +84,17 @@ instance DPS.FromRow NgramIds where ...@@ -83,19 +84,17 @@ instance DPS.FromRow NgramIds where
fromRow = NgramIds <$> field <*> field fromRow = NgramIds <$> field <*> field
---------------------- ----------------------
insertNgrams :: [(Ngram, SizeN)] -> Cmd [DPS.Only Int] insertNgrams :: [(Ngram, Size)] -> Cmd [NgramIds]
insertNgrams ns = mkCmd $ \conn -> DPS.query conn queryInsertNgrams (DPS.Only $ Values fields ns) insertNgrams ns = mkCmd $ \conn -> DPS.query conn queryInsertNgrams (DPS.Only $ Values fields ns)
where where
fields = map (\t -> QualifiedIdentifier Nothing t) ["text", "int4"] fields = map (\t -> QualifiedIdentifier Nothing t) ["text", "int4"]
insertNgrams_Debug :: [(Ngram, SizeN)] -> Cmd ByteString insertNgrams_Debug :: [(Ngram, Size)] -> Cmd ByteString
insertNgrams_Debug ns = mkCmd $ \conn -> DPS.formatQuery conn queryInsertNgrams (DPS.Only $ Values fields ns) insertNgrams_Debug ns = mkCmd $ \conn -> DPS.formatQuery conn queryInsertNgrams (DPS.Only $ Values fields ns)
where where
fields = map (\t -> QualifiedIdentifier Nothing t) ["text", "int4"] fields = map (\t -> QualifiedIdentifier Nothing t) ["text", "int4"]
---------------------- ----------------------
queryInsertNgrams :: DPS.Query queryInsertNgrams :: DPS.Query
queryInsertNgrams = [sql| queryInsertNgrams = [sql|
...@@ -114,4 +113,3 @@ queryInsertNgrams = [sql| ...@@ -114,4 +113,3 @@ queryInsertNgrams = [sql|
FROM input_rows FROM input_rows
JOIN ngrams c USING (terms); -- columns of unique index JOIN ngrams c USING (terms); -- columns of unique index
|] |]
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment