Commit ca8ff794 authored by Alexandre Delanoë's avatar Alexandre Delanoë

[DBFLOW] NodeNgrams insertion with typed ngrams.

parent cd3c7b82
......@@ -39,8 +39,9 @@ import Gargantext.Database.Node (getRoot, mkRoot, mkCorpus, Cmd(..))
import Gargantext.Database.User (getUser, UserLight(..), Username)
import Gargantext.Database.Node.Document.Insert (insertDocuments, ReturnId(..), addUniqIds)
import Gargantext.Database.Node.Document.Add (add)
import Gargantext.Database.NodeNgram (NodeNgramPoly(..), insertNodeNgrams)
import Gargantext.Text.Parsers (parseDocs, FileFormat(WOS))
import Gargantext.Database.Ngram (insertNgrams, Ngrams(..), NgramsT(..), NgramsIndexed(..), indexNgramsT)
import Gargantext.Database.Ngram (insertNgrams, Ngrams(..), NgramsT(..), NgramsIndexed(..), indexNgramsT, ngramsTypeId)
type UserId = Int
type RootId = Int
......@@ -128,25 +129,33 @@ documentIdWithNgrams :: (HyperdataDocument -> Map (NgramsT Ngrams) Int)
documentIdWithNgrams f = map (\d -> DocumentIdWithNgrams d ((f . documentData) d))
-- | TODO check optimization
mapNodeIdNgrams :: [DocumentIdWithNgrams] -> Map (NgramsT Ngrams) [(NodeId,Int)]
mapNodeIdNgrams ds = DM.fromListWith (<>) xs
mapNodeIdNgrams :: [DocumentIdWithNgrams] -> Map (NgramsT Ngrams) (Map NodeId Int)
mapNodeIdNgrams ds = DM.map (DM.fromListWith (+)) $ DM.fromListWith (<>) xs
where
xs = [(ng, [(nId, i)]) | (nId, n2i') <- n2i ds, (ng, i) <- DM.toList n2i']
n2i = map (\d -> ((documentId . documentWithId) d, document_ngrams d))
indexNgrams :: Map (NgramsT Ngrams ) [(NodeId, Int)]
-> Cmd (Map (NgramsT NgramsIndexed) [(NodeId, Int)])
indexNgrams :: Map (NgramsT Ngrams ) (Map NodeId Int)
-> Cmd (Map (NgramsT NgramsIndexed) (Map NodeId Int))
indexNgrams ng2nId = do
terms2id <- insertNgrams (map _ngramsT $ DM.keys ng2nId)
pure $ DM.mapKeys (indexNgramsT terms2id) ng2nId
---- insert to NodeNgram
---- using insertNgrams from
--indexNgram :: Map Ngram (Map NodeId Int) -> Map NgramId (Map NodeId Int)
--indexNgram = undefined
insertToNodeNgrams :: Map (NgramsT NgramsIndexed) (Map NodeId Int) -> Cmd Int
insertToNodeNgrams m = insertNodeNgrams $ [ NodeNgram Nothing nId ((_ngramsId . _ngramsT ) ng)
(fromIntegral n) ((ngramsTypeId . _ngramsType) ng)
| (ng, nId2int) <- DM.toList m
, (nId, n) <- DM.toList nId2int
]
-- group Ngrams
-- insert GroupId
-- mk List Group
-- group by fun
-- insertInto NodeNgramsNgrams
-- get data of NgramsTable
-- change List of ngrams
-- group ngrams
......@@ -83,6 +83,11 @@ import qualified Database.PostgreSQL.Simple as DPS
-- ngrams in text (title or abstract) of documents has Terms Type
data NgramsType = Sources | Authors | Terms
ngramsTypeId :: NgramsType -> Int
ngramsTypeId Terms = 1
ngramsTypeId Authors = 2
ngramsTypeId Sources = 3
type NgramsTerms = Text
type NgramsId = Int
type Size = Int
......
......@@ -23,30 +23,33 @@ commentary with @some markup@.
module Gargantext.Database.NodeNgram where
import Prelude
import Gargantext.Prelude
import Data.Profunctor.Product.TH (makeAdaptorAndInstance)
import Control.Lens.TH (makeLensesWith, abbreviatedFields)
import Gargantext.Database.Node (mkCmd, Cmd(..))
import Opaleye
data NodeNgramPoly id node_id ngram_id weight
data NodeNgramPoly id node_id ngram_id weight ngrams_type
= NodeNgram { nodeNgram_NodeNgramId :: id
, nodeNgram_NodeNgramNodeId :: node_id
, nodeNgram_NodeNgramNgramId :: ngram_id
, nodeNgram_NodeNgramWeight :: weight
, nodeNgram_NodeNgramType :: ngrams_type
} deriving (Show)
type NodeNgramWrite = NodeNgramPoly (Column PGInt4 )
type NodeNgramWrite = NodeNgramPoly (Maybe (Column PGInt4 ))
(Column PGInt4 )
(Column PGInt4 )
(Column PGFloat8)
(Column PGInt4 )
type NodeNgramRead = NodeNgramPoly (Column PGInt4 )
(Column PGInt4 )
(Column PGInt4 )
(Column PGFloat8)
(Column PGInt4 )
type NodeNgram = NodeNgramPoly Int Int Int Double
type NodeNgram = NodeNgramPoly (Maybe Int) Int Int Double Int
$(makeAdaptorAndInstance "pNodeNgram" ''NodeNgramPoly)
$(makeLensesWith abbreviatedFields ''NodeNgramPoly)
......@@ -54,13 +57,21 @@ $(makeLensesWith abbreviatedFields ''NodeNgramPoly)
nodeNgramTable :: Table NodeNgramWrite NodeNgramRead
nodeNgramTable = Table "nodes_ngrams" ( pNodeNgram NodeNgram
{ nodeNgram_NodeNgramId = required "id"
, nodeNgram_NodeNgramNodeId = required "node_id"
, nodeNgram_NodeNgramNgramId = required "ngram_id"
, nodeNgram_NodeNgramWeight = required "weight"
{ nodeNgram_NodeNgramId = optional "id"
, nodeNgram_NodeNgramNodeId = required "node_id"
, nodeNgram_NodeNgramNgramId = required "ngram_id"
, nodeNgram_NodeNgramWeight = required "weight"
, nodeNgram_NodeNgramType = required "ngrams_type"
}
)
queryNodeNgramTable :: Query NodeNgramRead
queryNodeNgramTable = queryTable nodeNgramTable
insertNodeNgrams :: [NodeNgram] -> Cmd Int
insertNodeNgrams nns = insertNodeNgramW $ map (\(NodeNgram i n g w t) -> NodeNgram Nothing (pgInt4 n) (pgInt4 g) (pgDouble w) (pgInt4 t) ) nns
insertNodeNgramW :: [NodeNgramWrite] -> Cmd Int
insertNodeNgramW nns = mkCmd $ \c -> fromIntegral <$> runInsertMany c nodeNgramTable nns
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment