Commit a4fb6705 authored by Alexandre Delanoë's avatar Alexandre Delanoë

[FLOW][DB][NGRAMS] group inserted in db.

parent 7c083bed
...@@ -63,7 +63,7 @@ library: ...@@ -63,7 +63,7 @@ library:
- Gargantext.Text.Terms.Multi.Lang.Fr - Gargantext.Text.Terms.Multi.Lang.Fr
- Gargantext.Text.Terms.Multi.RAKE - Gargantext.Text.Terms.Multi.RAKE
- Gargantext.Text.Terms.WithList - Gargantext.Text.Terms.WithList
- Gargantext.TextFlow - Gargantext.Text.Flow
- Gargantext.Viz.Graph - Gargantext.Viz.Graph
- Gargantext.Viz.Graph.Distances.Matrice - Gargantext.Viz.Graph.Distances.Matrice
- Gargantext.Viz.Graph.Index - Gargantext.Viz.Graph.Index
......
...@@ -40,10 +40,7 @@ nodeTypeId n = ...@@ -40,10 +40,7 @@ nodeTypeId n =
--NodeSwap -> 19 --NodeSwap -> 19
---- Lists ---- Lists
-- StopList -> 5 NodeList -> 5
-- GroupList -> 6
-- MainList -> 7
-- MapList -> 8
---- Scores ---- Scores
-- NodeOccurrences -> 10 -- NodeOccurrences -> 10
......
...@@ -26,20 +26,22 @@ module Gargantext.Database.Flow ...@@ -26,20 +26,22 @@ module Gargantext.Database.Flow
where where
import System.FilePath (FilePath) import System.FilePath (FilePath)
import Data.Maybe (Maybe(..)) import Data.Maybe (Maybe(..))
import Data.Text (Text, unpack) import Data.Text (Text)
import Data.Map (Map) import Data.Map (Map)
import Data.Tuple.Extra (both)
import qualified Data.Map as DM import qualified Data.Map as DM
import GHC.Generics (Generic)
import Gargantext.Core.Types (NodePoly(..)) import Gargantext.Core.Types (NodePoly(..))
import Gargantext.Prelude import Gargantext.Prelude
import Gargantext.Database.Bashql (runCmd', del) import Gargantext.Database.Bashql (runCmd', del)
import Gargantext.Database.Types.Node (Node(..), HyperdataDocument(..)) import Gargantext.Database.Types.Node (HyperdataDocument(..))
import Gargantext.Database.Node (getRoot, mkRoot, mkCorpus, Cmd(..), mkList) import Gargantext.Database.Node (getRoot, mkRoot, mkCorpus, Cmd(..), mkList)
import Gargantext.Database.User (getUser, UserLight(..), Username) import Gargantext.Database.User (getUser, UserLight(..), Username)
import Gargantext.Database.Node.Document.Insert (insertDocuments, ReturnId(..), addUniqIds) import Gargantext.Database.Node.Document.Insert (insertDocuments, ReturnId(..), addUniqIds)
import Gargantext.Database.Node.Document.Add (add) import Gargantext.Database.Node.Document.Add (add)
import Gargantext.Database.NodeNgram (NodeNgramPoly(..), insertNodeNgrams) import Gargantext.Database.NodeNgram (NodeNgramPoly(..), insertNodeNgrams)
import Gargantext.Database.NodeNgramNgram (NodeNgramNgramPoly(..), insertNodeNgramNgram)
import Gargantext.Text.Parsers (parseDocs, FileFormat(WOS)) import Gargantext.Text.Parsers (parseDocs, FileFormat(WOS))
import Gargantext.Database.Ngram (insertNgrams, Ngrams(..), NgramsT(..), NgramsIndexed(..), indexNgramsT, ngramsTypeId) import Gargantext.Database.Ngram (insertNgrams, Ngrams(..), NgramsT(..), NgramsIndexed(..), indexNgramsT, ngramsTypeId)
...@@ -153,13 +155,22 @@ insertToNodeNgrams m = insertNodeNgrams $ [ NodeNgram Nothing nId ((_ngramsId ...@@ -153,13 +155,22 @@ insertToNodeNgrams m = insertNodeNgrams $ [ NodeNgram Nothing nId ((_ngramsId
] ]
------------------------------------------------------------------------ ------------------------------------------------------------------------
groupNgramsBy :: fun groupNgramsBy :: (Ngrams -> Ngrams -> Bool) -> Map (NgramsT NgramsIndexed) (Map NodeId Int) -> Map NgramsIndexed NgramsIndexed
groupNgramsBy = undefined
insertGroups :: ListId -> Map NgramsIndexed NgramsIndexed -> Cmd Int
insertGroups lId ngrs =
insertNodeNgramNgram $ [ NodeNgramNgram lId ng1 ng2 (Just 1)
| (ng1, ng2) <- map (both _ngramsId) $ DM.toList ngrs
]
listFlow :: UserId -> CorpusId -> Map (NgramsT NgramsIndexed) (Map NodeId Int) -> Cmd [ListId] listFlow :: UserId -> CorpusId -> Map (NgramsT NgramsIndexed) (Map NodeId Int) -> Cmd ListId
listFlow uId cId ng = do listFlow uId cId ng = do
lId <- mkList cId uId lId <- maybe (panic "mkList error") identity <$> head <$> mkList cId uId
-- insertGroups = NodeNgramsNgrams -- TODO add stemming equivalence of 2 ngrams
let groupEd = groupNgramsBy (==) ng
_ <- insertGroups lId groupEd
-- compute Candidate / Map -- compute Candidate / Map
-- ALTER TABLE nodes_nodes_ngrams ADD COLUMN typelist int; -- ALTER TABLE nodes_nodes_ngrams ADD COLUMN typelist int;
...@@ -168,7 +179,6 @@ listFlow uId cId ng = do ...@@ -168,7 +179,6 @@ listFlow uId cId ng = do
pure lId pure lId
-- | TODO ask on meeting -- | TODO ask on meeting
-- get data of NgramsTable -- get data of NgramsTable
-- post :: update NodeNodeNgrams -- post :: update NodeNodeNgrams
......
...@@ -25,21 +25,16 @@ module Gargantext.Database.Ngram where ...@@ -25,21 +25,16 @@ module Gargantext.Database.Ngram where
-- import Opaleye -- import Opaleye
import Control.Lens (makeLenses) import Control.Lens (makeLenses)
import Control.Lens.TH (makeLensesWith, abbreviatedFields)
import Data.ByteString.Internal (ByteString) import Data.ByteString.Internal (ByteString)
import Data.List (find)
import Data.Map (Map, fromList, lookup) import Data.Map (Map, fromList, lookup)
import Data.Maybe (Maybe)
import Data.Profunctor.Product.TH (makeAdaptorAndInstance)
import Data.Text (Text) import Data.Text (Text)
import Database.PostgreSQL.Simple.FromField ( FromField, fromField)
import Database.PostgreSQL.Simple.FromRow (fromRow, field) import Database.PostgreSQL.Simple.FromRow (fromRow, field)
import Database.PostgreSQL.Simple.SqlQQ (sql) import Database.PostgreSQL.Simple.SqlQQ (sql)
import Database.PostgreSQL.Simple.ToField (toField) import Database.PostgreSQL.Simple.ToField (toField)
import Database.PostgreSQL.Simple.ToRow (toRow) import Database.PostgreSQL.Simple.ToRow (toRow)
import Database.PostgreSQL.Simple.Types (Values(..), QualifiedIdentifier(..)) import Database.PostgreSQL.Simple.Types (Values(..), QualifiedIdentifier(..))
import GHC.Generics (Generic) import GHC.Generics (Generic)
import Gargantext.Database.Node (runCmd, mkCmd, Cmd(..)) import Gargantext.Database.Node (mkCmd, Cmd(..))
import Gargantext.Prelude import Gargantext.Prelude
import qualified Database.PostgreSQL.Simple as DPS import qualified Database.PostgreSQL.Simple as DPS
...@@ -108,38 +103,46 @@ instance DPS.ToRow Ngrams where ...@@ -108,38 +103,46 @@ instance DPS.ToRow Ngrams where
------------------------------------------------------------------------- -------------------------------------------------------------------------
-- | TODO put it in Gargantext.Text.Ngrams -- | TODO put it in Gargantext.Text.Ngrams
-- Named entity are typed ngrams of Terms Ngrams -- Named entity are typed ngrams of Terms Ngrams
data NgramsT a = NgramsT { _ngramsType :: NgramsType data NgramsT a =
, _ngramsT :: a NgramsT { _ngramsType :: NgramsType
} deriving (Generic) , _ngramsT :: a
} deriving (Generic)
instance Eq (NgramsT a) where (==) = (==) instance Eq (NgramsT a) where (==) = (==)
instance Ord (NgramsT a) where compare = compare instance Ord (NgramsT a) where compare = compare
makeLenses ''NgramsT makeLenses ''NgramsT
----------------------------------------------------------------------- -----------------------------------------------------------------------
data NgramsIndexed = NgramsIndexed { _ngrams :: Ngrams data NgramsIndexed =
, _ngramsId :: NgramsId NgramsIndexed
} deriving (Generic) { _ngrams :: Ngrams
, _ngramsId :: NgramsId
} deriving (Generic)
instance Eq NgramsIndexed where instance Eq NgramsIndexed where
(==) = (==) (==) = (==)
instance Ord NgramsIndexed where instance Ord NgramsIndexed where
compare = compare compare = compare
makeLenses ''NgramsIndexed makeLenses ''NgramsIndexed
------------------------------------------------------------------------ ------------------------------------------------------------------------
data NgramIds = NgramIds { ngramId :: Int data NgramIds =
, ngramTerms :: Text NgramIds
} deriving (Show, Generic) { ngramId :: Int
, ngramTerms :: Text
} deriving (Show, Generic)
instance DPS.FromRow NgramIds where instance DPS.FromRow NgramIds where
fromRow = NgramIds <$> field <*> field fromRow = NgramIds <$> field <*> field
---------------------- ----------------------
indexNgramsT :: Map NgramsTerms NgramsId -> NgramsT Ngrams -> NgramsT NgramsIndexed indexNgramsT :: Map NgramsTerms NgramsId -> NgramsT Ngrams -> NgramsT NgramsIndexed
indexNgramsT m n = indexNgramsTWith f n indexNgramsT m ngrId = indexNgramsTWith f ngrId
where where
f n = maybe (panic "indexNgramsT: should not happen") identity (lookup n m) f n = maybe (panic "indexNgramsT: should not happen") identity (lookup n m)
indexNgramsTWith :: (NgramsTerms -> NgramsId) -> NgramsT Ngrams-> NgramsT NgramsIndexed indexNgramsTWith :: (NgramsTerms -> NgramsId) -> NgramsT Ngrams-> NgramsT NgramsIndexed
indexNgramsTWith f (NgramsT t n) = NgramsT t (NgramsIndexed n ((f . _ngramsTerms) n)) indexNgramsTWith f (NgramsT t n) = NgramsT t (NgramsIndexed n ((f . _ngramsTerms) n))
----------------------
insertNgrams :: [Ngrams] -> Cmd (Map NgramsTerms NgramsId) insertNgrams :: [Ngrams] -> Cmd (Map NgramsTerms NgramsId)
insertNgrams ns = fromList <$> map (\(NgramIds i t) -> (t, i)) <$> (insertNgrams' ns) insertNgrams ns = fromList <$> map (\(NgramIds i t) -> (t, i)) <$> (insertNgrams' ns)
......
...@@ -30,40 +30,46 @@ import Gargantext.Database.Node (mkCmd, Cmd(..)) ...@@ -30,40 +30,46 @@ import Gargantext.Database.Node (mkCmd, Cmd(..))
import Opaleye import Opaleye
data NodeNgramPoly id node_id ngram_id weight ngrams_type data NodeNgramPoly id node_id ngram_id weight ngrams_type
= NodeNgram { nodeNgram_NodeNgramId :: id = NodeNgram { nodeNgram_NodeNgramId :: id
, nodeNgram_NodeNgramNodeId :: node_id , nodeNgram_NodeNgramNodeId :: node_id
, nodeNgram_NodeNgramNgramId :: ngram_id , nodeNgram_NodeNgramNgramId :: ngram_id
, nodeNgram_NodeNgramWeight :: weight , nodeNgram_NodeNgramWeight :: weight
, nodeNgram_NodeNgramType :: ngrams_type , nodeNgram_NodeNgramType :: ngrams_type
} deriving (Show) } deriving (Show)
type NodeNgramWrite = NodeNgramPoly (Maybe (Column PGInt4 )) type NodeNgramWrite =
(Column PGInt4 ) NodeNgramPoly
(Column PGInt4 ) (Maybe (Column PGInt4 ))
(Column PGFloat8) (Column PGInt4 )
(Column PGInt4 ) (Column PGInt4 )
(Column PGFloat8)
type NodeNgramRead = NodeNgramPoly (Column PGInt4 ) (Column PGInt4 )
(Column PGInt4 )
(Column PGInt4 ) type NodeNgramRead =
(Column PGFloat8) NodeNgramPoly
(Column PGInt4 ) (Column PGInt4 )
(Column PGInt4 )
type NodeNgram = NodeNgramPoly (Maybe Int) Int Int Double Int (Column PGInt4 )
(Column PGFloat8)
(Column PGInt4 )
type NodeNgram =
NodeNgramPoly (Maybe Int) Int Int Double Int
$(makeAdaptorAndInstance "pNodeNgram" ''NodeNgramPoly) $(makeAdaptorAndInstance "pNodeNgram" ''NodeNgramPoly)
$(makeLensesWith abbreviatedFields ''NodeNgramPoly) $(makeLensesWith abbreviatedFields ''NodeNgramPoly)
nodeNgramTable :: Table NodeNgramWrite NodeNgramRead nodeNgramTable :: Table NodeNgramWrite NodeNgramRead
nodeNgramTable = Table "nodes_ngrams" ( pNodeNgram NodeNgram nodeNgramTable = Table "nodes_ngrams"
{ nodeNgram_NodeNgramId = optional "id" ( pNodeNgram NodeNgram
, nodeNgram_NodeNgramNodeId = required "node_id" { nodeNgram_NodeNgramId = optional "id"
, nodeNgram_NodeNgramNgramId = required "ngram_id" , nodeNgram_NodeNgramNodeId = required "node_id"
, nodeNgram_NodeNgramWeight = required "weight" , nodeNgram_NodeNgramNgramId = required "ngram_id"
, nodeNgram_NodeNgramType = required "ngrams_type" , nodeNgram_NodeNgramWeight = required "weight"
} , nodeNgram_NodeNgramType = required "ngrams_type"
) }
)
queryNodeNgramTable :: Query NodeNgramRead queryNodeNgramTable :: Query NodeNgramRead
queryNodeNgramTable = queryTable nodeNgramTable queryNodeNgramTable = queryTable nodeNgramTable
...@@ -76,6 +82,7 @@ insertNodeNgrams = insertNodeNgramW ...@@ -76,6 +82,7 @@ insertNodeNgrams = insertNodeNgramW
) )
insertNodeNgramW :: [NodeNgramWrite] -> Cmd Int insertNodeNgramW :: [NodeNgramWrite] -> Cmd Int
insertNodeNgramW nns = mkCmd $ \c -> fromIntegral <$> runInsertMany c nodeNgramTable nns insertNodeNgramW nns =
mkCmd $ \c -> fromIntegral
<$> runInsertMany c nodeNgramTable nns
...@@ -8,11 +8,14 @@ Stability : experimental ...@@ -8,11 +8,14 @@ Stability : experimental
Portability : POSIX Portability : POSIX
NodeNgramNgram table is used to group Ngrams NodeNgramNgram table is used to group Ngrams
- NodeId :: List Id
- NgramId_1, NgramId_2 where all NgramId_2 will be added to NgramId_1
- weight: probability of the relation (TODO, fixed to 1 for simple stemming)
Next Step: Next Step benchmark:
- recursive queries of postgres
- group with: https://en.wikipedia.org/wiki/Nested_set_model - group with: https://en.wikipedia.org/wiki/Nested_set_model
-} -}
{-# OPTIONS_GHC -fno-warn-orphans #-} {-# OPTIONS_GHC -fno-warn-orphans #-}
...@@ -23,15 +26,16 @@ Next Step: ...@@ -23,15 +26,16 @@ Next Step:
{-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE TemplateHaskell #-} {-# LANGUAGE TemplateHaskell #-}
module Gargantext.Database.NodeNgramNgram where module Gargantext.Database.NodeNgramNgram
where
import Gargantext.Prelude import Control.Lens.TH (makeLensesWith, abbreviatedFields)
import Data.Maybe (Maybe) import Data.Maybe (Maybe)
import Data.Profunctor.Product.TH (makeAdaptorAndInstance) import Data.Profunctor.Product.TH (makeAdaptorAndInstance)
import Control.Lens.TH (makeLensesWith, abbreviatedFields) import Gargantext.Database.Node (mkCmd, Cmd(..))
import qualified Database.PostgreSQL.Simple as PGS import Gargantext.Prelude
import Opaleye import Opaleye
import qualified Database.PostgreSQL.Simple as PGS
data NodeNgramNgramPoly node_id ngram1_id ngram2_id weight = data NodeNgramNgramPoly node_id ngram1_id ngram2_id weight =
NodeNgramNgram { nng_NodeId :: node_id NodeNgramNgram { nng_NodeId :: node_id
...@@ -42,7 +46,7 @@ data NodeNgramNgramPoly node_id ngram1_id ngram2_id weight = ...@@ -42,7 +46,7 @@ data NodeNgramNgramPoly node_id ngram1_id ngram2_id weight =
type NodeNgramNgramWrite = type NodeNgramNgramWrite =
NodeNgramNgramPoly (Maybe (Column PGInt4 )) NodeNgramNgramPoly (Column PGInt4 )
(Column PGInt4 ) (Column PGInt4 )
(Column PGInt4 ) (Column PGInt4 )
(Maybe (Column PGFloat8)) (Maybe (Column PGFloat8))
...@@ -54,10 +58,10 @@ type NodeNgramNgramRead = ...@@ -54,10 +58,10 @@ type NodeNgramNgramRead =
(Column PGFloat8) (Column PGFloat8)
type NodeNgramNgram = type NodeNgramNgram =
NodeNgramNgramPoly (Maybe Int ) NodeNgramNgramPoly Int
Int Int
Int Int
(Maybe Double) (Maybe Double)
$(makeAdaptorAndInstance "pNodeNgramNgram" $(makeAdaptorAndInstance "pNodeNgramNgram"
''NodeNgramNgramPoly) ''NodeNgramNgramPoly)
...@@ -69,7 +73,7 @@ nodeNgramNgramTable :: Table NodeNgramNgramWrite NodeNgramNgramRead ...@@ -69,7 +73,7 @@ nodeNgramNgramTable :: Table NodeNgramNgramWrite NodeNgramNgramRead
nodeNgramNgramTable = nodeNgramNgramTable =
Table "nodes_ngrams_ngrams" Table "nodes_ngrams_ngrams"
( pNodeNgramNgram NodeNgramNgram ( pNodeNgramNgram NodeNgramNgram
{ nng_NodeId = optional "node_id" { nng_NodeId = required "node_id"
, nng_Ngram1Id = required "ngram1_id" , nng_Ngram1Id = required "ngram1_id"
, nng_Ngram2Id = required "ngram2_id" , nng_Ngram2Id = required "ngram2_id"
, nng_Weight = optional "weight" , nng_Weight = optional "weight"
...@@ -79,9 +83,10 @@ nodeNgramNgramTable = ...@@ -79,9 +83,10 @@ nodeNgramNgramTable =
queryNodeNgramNgramTable :: Query NodeNgramNgramRead queryNodeNgramNgramTable :: Query NodeNgramNgramRead
queryNodeNgramNgramTable = queryTable nodeNgramNgramTable queryNodeNgramNgramTable = queryTable nodeNgramNgramTable
-- | not optimized (get all ngrams without filters) -- | Select NodeNgramNgram
nodeNgramNgrams :: PGS.Connection -> IO [NodeNgramNgram] -- TODO not optimized (get all ngrams without filters)
nodeNgramNgrams conn = runQuery conn queryNodeNgramNgramTable nodeNgramNgram :: PGS.Connection -> IO [NodeNgramNgram]
nodeNgramNgram conn = runQuery conn queryNodeNgramNgramTable
instance QueryRunnerColumnDefault PGInt4 (Maybe Int) where instance QueryRunnerColumnDefault PGInt4 (Maybe Int) where
queryRunnerColumnDefault = fieldQueryRunnerColumn queryRunnerColumnDefault = fieldQueryRunnerColumn
...@@ -90,3 +95,19 @@ instance QueryRunnerColumnDefault PGFloat8 (Maybe Double) where ...@@ -90,3 +95,19 @@ instance QueryRunnerColumnDefault PGFloat8 (Maybe Double) where
queryRunnerColumnDefault = fieldQueryRunnerColumn queryRunnerColumnDefault = fieldQueryRunnerColumn
insertNodeNgramNgram :: [NodeNgramNgram] -> Cmd Int
insertNodeNgramNgram = insertNodeNgramNgramW
. map (\(NodeNgramNgram n ng1 ng2 maybeWeight) ->
NodeNgramNgram (pgInt4 n)
(pgInt4 ng1)
(pgInt4 ng2)
(pgDouble <$> maybeWeight)
)
insertNodeNgramNgramW :: [NodeNgramNgramWrite] -> Cmd Int
insertNodeNgramNgramW ns =
mkCmd $ \c -> fromIntegral
<$> runInsertMany c nodeNgramNgramTable ns
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment