Commit 986a253c authored by Alexandre Delanoë's avatar Alexandre Delanoë

[DBFLOW] Ngrams, NgramsIndexed, NgramsT a.

parent f6a473f2
......@@ -18,16 +18,20 @@ authors
-}
{-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE TemplateHaskell #-}
module Gargantext.Database.Flow
where
import System.FilePath (FilePath)
import Control.Lens (makeLenses)
import Data.Maybe (Maybe(..))
import Data.Text (Text, unpack)
import Data.Map (Map)
import qualified Data.Map as DM
import GHC.Generics (Generic)
import Gargantext.Core.Types (NodePoly(..))
import Gargantext.Prelude
......@@ -38,6 +42,7 @@ import Gargantext.Database.User (getUser, UserLight(..), Username)
import Gargantext.Database.Node.Document.Insert (insertDocuments, ReturnId(..), addUniqIds)
import Gargantext.Database.Node.Document.Add (add)
import Gargantext.Text.Parsers (parseDocs, FileFormat(WOS))
import Gargantext.Database.Ngram (insertNgrams, NgramIds(..))
type UserId = Int
type RootId = Int
......@@ -109,39 +114,66 @@ data DocumentWithId = DocumentWithId { documentId :: NodeId
type NodeId = Int
mergeData :: Map HashId ReturnId -> Map HashId HyperdataDocument
-> [DocumentWithId]
mergeData :: Map HashId ReturnId -> Map HashId HyperdataDocument -> [DocumentWithId]
mergeData rs hs = map (\(hash,r) -> DocumentWithId (reId r) (lookup' hash hs)) $ DM.toList rs
where
lookup' h xs = maybe (panic $ "Error with " <> h) identity (DM.lookup h xs)
-- | Main Ngrams Types
-- | Typed Ngrams
-- Typed Ngrams localize the context of the ngrams
-- ngrams in source field of document has Sources Type
-- ngrams in authors field of document has Authors Type
-- ngrams in text (title or abstract) of documents has Terms Type
data NgramsType = Sources | Authors | Terms
data DocumentIdWithNgrams = DocumentIdWithNgrams { documentWithId :: DocumentWithId
, document_ngrams :: Map Ngram Int
}
type NgramId = Int
data NgramsType = Sources | Authors | Terms
data Ngrams = Ngrams { _ngramsTerms :: Text
, _ngramsSize :: Int
} deriving (Generic)
instance Eq Ngrams where
(==) = (==)
instance Ord Ngrams where
compare = compare
makeLenses ''Ngrams
data NgramsIndexed = NgramsIndexed { _ngrams :: Ngrams
, _ngramsId :: NgramId
} deriving (Generic)
instance Eq NgramsIndexed where
(==) = (==)
instance Ord NgramsIndexed where
compare = compare
makeLenses ''NgramsIndexed
data NgramsT a = NgramsT { _ngramsType :: NgramsType
, _ngramsT :: a
} deriving (Generic)
instance Eq (NgramsT a) where (==) = (==)
instance Ord (NgramsT a) where compare = compare
makeLenses ''NgramsT
data DocumentIdWithNgrams = DocumentIdWithNgrams { documentWithId :: DocumentWithId
, document_ngrams :: Map (NgramsT Ngrams)Int
}
-- | Typed Ngrams
data Ngrams = Ngrams { ngramsType :: NgramsType
, ngramsText :: Text
, ngramsSize :: Int
}
type Ngram = Text
type NgramId = Int
documentIdWithNgrams :: (HyperdataDocument -> Map Ngram Int) -> [DocumentWithId] -> [DocumentIdWithNgrams]
documentIdWithNgrams :: (HyperdataDocument -> Map (NgramsT Ngrams) Int) -> [DocumentWithId] -> [DocumentIdWithNgrams]
documentIdWithNgrams f = map (\d -> DocumentIdWithNgrams d ((f . documentData) d))
-- | TODO check optimization
mapNodeIdNgrams :: [DocumentIdWithNgrams] -> Map Ngram [(NodeId,Int)]
mapNodeIdNgrams :: [DocumentIdWithNgrams] -> Map (NgramsT Ngrams) [(NodeId,Int)]
mapNodeIdNgrams ds = DM.fromListWith (<>) xs
where
xs = [(ngId, [(nId, i)]) | (nId, n2i') <- n2i ds, (ngId, i) <- DM.toList n2i']
xs = [(ng, [(nId, i)]) | (nId, n2i') <- n2i ds, (ng, i) <- DM.toList n2i']
n2i = map (\d -> ((documentId . documentWithId) d, document_ngrams d))
--indexNgram :: [Ngram] ->
indexNgrams :: Map (NgramsT Ngrams ) [(NodeId, Int)]
-> IO (Map (NgramsT NgramsIndexed) [(NodeId, Int)])
indexNgrams ng2nId = undefined
--let keys = DM.keys ng2nId
......@@ -150,5 +182,7 @@ mapNodeIdNgrams ds = DM.fromListWith (<>) xs
--indexNgram :: Map Ngram (Map NodeId Int) -> Map NgramId (Map NodeId Int)
--indexNgram = undefined
-- grouping here
-- group Ngrams
-- insert Groups
......@@ -37,6 +37,7 @@ import Gargantext.Database.Node (mkCmd, Cmd(..))
import Prelude
import qualified Database.PostgreSQL.Simple as DPS
import Database.PostgreSQL.Simple.FromField ( FromField, fromField)
import Database.PostgreSQL.Simple.Types (Values(..), QualifiedIdentifier(..))
......@@ -73,9 +74,9 @@ import Database.PostgreSQL.Simple.Types (Values(..), QualifiedIdentifier(..))
type Ngram = Text
type NgramId = Int
type SizeN = Int
type Size = Int
data NgramIds = NgramIds { ngramId :: Int
data NgramIds = NgramIds { ngramId :: Int
, ngramTerms :: Text
} deriving (Show, Generic)
......@@ -83,19 +84,17 @@ instance DPS.FromRow NgramIds where
fromRow = NgramIds <$> field <*> field
----------------------
insertNgrams :: [(Ngram, SizeN)] -> Cmd [DPS.Only Int]
insertNgrams :: [(Ngram, Size)] -> Cmd [NgramIds]
insertNgrams ns = mkCmd $ \conn -> DPS.query conn queryInsertNgrams (DPS.Only $ Values fields ns)
where
fields = map (\t -> QualifiedIdentifier Nothing t) ["text", "int4"]
insertNgrams_Debug :: [(Ngram, SizeN)] -> Cmd ByteString
insertNgrams_Debug :: [(Ngram, Size)] -> Cmd ByteString
insertNgrams_Debug ns = mkCmd $ \conn -> DPS.formatQuery conn queryInsertNgrams (DPS.Only $ Values fields ns)
where
fields = map (\t -> QualifiedIdentifier Nothing t) ["text", "int4"]
----------------------
queryInsertNgrams :: DPS.Query
queryInsertNgrams = [sql|
......@@ -114,4 +113,3 @@ queryInsertNgrams = [sql|
FROM input_rows
JOIN ngrams c USING (terms); -- columns of unique index
|]
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment