Commit a4fb6705 authored by Alexandre Delanoë's avatar Alexandre Delanoë

[FLOW][DB][NGRAMS] group inserted in db.

parent 7c083bed
......@@ -63,7 +63,7 @@ library:
- Gargantext.Text.Terms.Multi.Lang.Fr
- Gargantext.Text.Terms.Multi.RAKE
- Gargantext.Text.Terms.WithList
- Gargantext.TextFlow
- Gargantext.Text.Flow
- Gargantext.Viz.Graph
- Gargantext.Viz.Graph.Distances.Matrice
- Gargantext.Viz.Graph.Index
......
......@@ -40,10 +40,7 @@ nodeTypeId n =
--NodeSwap -> 19
---- Lists
-- StopList -> 5
-- GroupList -> 6
-- MainList -> 7
-- MapList -> 8
NodeList -> 5
---- Scores
-- NodeOccurrences -> 10
......
......@@ -26,20 +26,22 @@ module Gargantext.Database.Flow
where
import System.FilePath (FilePath)
import Data.Maybe (Maybe(..))
import Data.Text (Text, unpack)
import Data.Text (Text)
import Data.Map (Map)
import Data.Tuple.Extra (both)
import qualified Data.Map as DM
import GHC.Generics (Generic)
import Gargantext.Core.Types (NodePoly(..))
import Gargantext.Prelude
import Gargantext.Database.Bashql (runCmd', del)
import Gargantext.Database.Types.Node (Node(..), HyperdataDocument(..))
import Gargantext.Database.Types.Node (HyperdataDocument(..))
import Gargantext.Database.Node (getRoot, mkRoot, mkCorpus, Cmd(..), mkList)
import Gargantext.Database.User (getUser, UserLight(..), Username)
import Gargantext.Database.Node.Document.Insert (insertDocuments, ReturnId(..), addUniqIds)
import Gargantext.Database.Node.Document.Add (add)
import Gargantext.Database.NodeNgram (NodeNgramPoly(..), insertNodeNgrams)
import Gargantext.Database.NodeNgramNgram (NodeNgramNgramPoly(..), insertNodeNgramNgram)
import Gargantext.Text.Parsers (parseDocs, FileFormat(WOS))
import Gargantext.Database.Ngram (insertNgrams, Ngrams(..), NgramsT(..), NgramsIndexed(..), indexNgramsT, ngramsTypeId)
......@@ -153,13 +155,22 @@ insertToNodeNgrams m = insertNodeNgrams $ [ NodeNgram Nothing nId ((_ngramsId
]
------------------------------------------------------------------------
groupNgramsBy :: fun
groupNgramsBy :: (Ngrams -> Ngrams -> Bool) -> Map (NgramsT NgramsIndexed) (Map NodeId Int) -> Map NgramsIndexed NgramsIndexed
groupNgramsBy = undefined
insertGroups :: ListId -> Map NgramsIndexed NgramsIndexed -> Cmd Int
insertGroups lId ngrs =
insertNodeNgramNgram $ [ NodeNgramNgram lId ng1 ng2 (Just 1)
| (ng1, ng2) <- map (both _ngramsId) $ DM.toList ngrs
]
listFlow :: UserId -> CorpusId -> Map (NgramsT NgramsIndexed) (Map NodeId Int) -> Cmd [ListId]
listFlow :: UserId -> CorpusId -> Map (NgramsT NgramsIndexed) (Map NodeId Int) -> Cmd ListId
listFlow uId cId ng = do
lId <- mkList cId uId
-- insertGroups = NodeNgramsNgrams
lId <- maybe (panic "mkList error") identity <$> head <$> mkList cId uId
-- TODO add stemming equivalence of 2 ngrams
let groupEd = groupNgramsBy (==) ng
_ <- insertGroups lId groupEd
-- compute Candidate / Map
-- ALTER TABLE nodes_nodes_ngrams ADD COLUMN typelist int;
......@@ -168,7 +179,6 @@ listFlow uId cId ng = do
pure lId
-- | TODO ask on meeting
-- get data of NgramsTable
-- post :: update NodeNodeNgrams
......
......@@ -25,21 +25,16 @@ module Gargantext.Database.Ngram where
-- import Opaleye
import Control.Lens (makeLenses)
import Control.Lens.TH (makeLensesWith, abbreviatedFields)
import Data.ByteString.Internal (ByteString)
import Data.List (find)
import Data.Map (Map, fromList, lookup)
import Data.Maybe (Maybe)
import Data.Profunctor.Product.TH (makeAdaptorAndInstance)
import Data.Text (Text)
import Database.PostgreSQL.Simple.FromField ( FromField, fromField)
import Database.PostgreSQL.Simple.FromRow (fromRow, field)
import Database.PostgreSQL.Simple.SqlQQ (sql)
import Database.PostgreSQL.Simple.ToField (toField)
import Database.PostgreSQL.Simple.ToRow (toRow)
import Database.PostgreSQL.Simple.Types (Values(..), QualifiedIdentifier(..))
import GHC.Generics (Generic)
import Gargantext.Database.Node (runCmd, mkCmd, Cmd(..))
import Gargantext.Database.Node (mkCmd, Cmd(..))
import Gargantext.Prelude
import qualified Database.PostgreSQL.Simple as DPS
......@@ -108,38 +103,46 @@ instance DPS.ToRow Ngrams where
-------------------------------------------------------------------------
-- | TODO put it in Gargantext.Text.Ngrams
-- Named entity are typed ngrams of Terms Ngrams
data NgramsT a = NgramsT { _ngramsType :: NgramsType
, _ngramsT :: a
} deriving (Generic)
data NgramsT a =
NgramsT { _ngramsType :: NgramsType
, _ngramsT :: a
} deriving (Generic)
instance Eq (NgramsT a) where (==) = (==)
instance Ord (NgramsT a) where compare = compare
makeLenses ''NgramsT
-----------------------------------------------------------------------
data NgramsIndexed = NgramsIndexed { _ngrams :: Ngrams
, _ngramsId :: NgramsId
} deriving (Generic)
data NgramsIndexed =
NgramsIndexed
{ _ngrams :: Ngrams
, _ngramsId :: NgramsId
} deriving (Generic)
instance Eq NgramsIndexed where
(==) = (==)
instance Ord NgramsIndexed where
compare = compare
makeLenses ''NgramsIndexed
------------------------------------------------------------------------
data NgramIds = NgramIds { ngramId :: Int
, ngramTerms :: Text
} deriving (Show, Generic)
data NgramIds =
NgramIds
{ ngramId :: Int
, ngramTerms :: Text
} deriving (Show, Generic)
instance DPS.FromRow NgramIds where
fromRow = NgramIds <$> field <*> field
----------------------
indexNgramsT :: Map NgramsTerms NgramsId -> NgramsT Ngrams -> NgramsT NgramsIndexed
indexNgramsT m n = indexNgramsTWith f n
indexNgramsT m ngrId = indexNgramsTWith f ngrId
where
f n = maybe (panic "indexNgramsT: should not happen") identity (lookup n m)
indexNgramsTWith :: (NgramsTerms -> NgramsId) -> NgramsT Ngrams-> NgramsT NgramsIndexed
indexNgramsTWith f (NgramsT t n) = NgramsT t (NgramsIndexed n ((f . _ngramsTerms) n))
----------------------
insertNgrams :: [Ngrams] -> Cmd (Map NgramsTerms NgramsId)
insertNgrams ns = fromList <$> map (\(NgramIds i t) -> (t, i)) <$> (insertNgrams' ns)
......
......@@ -30,40 +30,46 @@ import Gargantext.Database.Node (mkCmd, Cmd(..))
import Opaleye
data NodeNgramPoly id node_id ngram_id weight ngrams_type
= NodeNgram { nodeNgram_NodeNgramId :: id
, nodeNgram_NodeNgramNodeId :: node_id
, nodeNgram_NodeNgramNgramId :: ngram_id
, nodeNgram_NodeNgramWeight :: weight
, nodeNgram_NodeNgramType :: ngrams_type
} deriving (Show)
type NodeNgramWrite = NodeNgramPoly (Maybe (Column PGInt4 ))
(Column PGInt4 )
(Column PGInt4 )
(Column PGFloat8)
(Column PGInt4 )
type NodeNgramRead = NodeNgramPoly (Column PGInt4 )
(Column PGInt4 )
(Column PGInt4 )
(Column PGFloat8)
(Column PGInt4 )
type NodeNgram = NodeNgramPoly (Maybe Int) Int Int Double Int
= NodeNgram { nodeNgram_NodeNgramId :: id
, nodeNgram_NodeNgramNodeId :: node_id
, nodeNgram_NodeNgramNgramId :: ngram_id
, nodeNgram_NodeNgramWeight :: weight
, nodeNgram_NodeNgramType :: ngrams_type
} deriving (Show)
type NodeNgramWrite =
NodeNgramPoly
(Maybe (Column PGInt4 ))
(Column PGInt4 )
(Column PGInt4 )
(Column PGFloat8)
(Column PGInt4 )
type NodeNgramRead =
NodeNgramPoly
(Column PGInt4 )
(Column PGInt4 )
(Column PGInt4 )
(Column PGFloat8)
(Column PGInt4 )
type NodeNgram =
NodeNgramPoly (Maybe Int) Int Int Double Int
$(makeAdaptorAndInstance "pNodeNgram" ''NodeNgramPoly)
$(makeLensesWith abbreviatedFields ''NodeNgramPoly)
nodeNgramTable :: Table NodeNgramWrite NodeNgramRead
nodeNgramTable = Table "nodes_ngrams" ( pNodeNgram NodeNgram
{ nodeNgram_NodeNgramId = optional "id"
, nodeNgram_NodeNgramNodeId = required "node_id"
, nodeNgram_NodeNgramNgramId = required "ngram_id"
, nodeNgram_NodeNgramWeight = required "weight"
, nodeNgram_NodeNgramType = required "ngrams_type"
}
)
nodeNgramTable = Table "nodes_ngrams"
( pNodeNgram NodeNgram
{ nodeNgram_NodeNgramId = optional "id"
, nodeNgram_NodeNgramNodeId = required "node_id"
, nodeNgram_NodeNgramNgramId = required "ngram_id"
, nodeNgram_NodeNgramWeight = required "weight"
, nodeNgram_NodeNgramType = required "ngrams_type"
}
)
queryNodeNgramTable :: Query NodeNgramRead
queryNodeNgramTable = queryTable nodeNgramTable
......@@ -76,6 +82,7 @@ insertNodeNgrams = insertNodeNgramW
)
insertNodeNgramW :: [NodeNgramWrite] -> Cmd Int
insertNodeNgramW nns = mkCmd $ \c -> fromIntegral <$> runInsertMany c nodeNgramTable nns
insertNodeNgramW nns =
mkCmd $ \c -> fromIntegral
<$> runInsertMany c nodeNgramTable nns
......@@ -8,11 +8,14 @@ Stability : experimental
Portability : POSIX
NodeNgramNgram table is used to group Ngrams
- NodeId :: List Id
- NgramId_1, NgramId_2 where all NgramId_2 will be added to NgramId_1
- weight: probability of the relation (TODO, fixed to 1 for simple stemming)
Next Step:
Next Step benchmark:
- recursive queries of postgres
- group with: https://en.wikipedia.org/wiki/Nested_set_model
-}
{-# OPTIONS_GHC -fno-warn-orphans #-}
......@@ -23,15 +26,16 @@ Next Step:
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE TemplateHaskell #-}
module Gargantext.Database.NodeNgramNgram where
module Gargantext.Database.NodeNgramNgram
where
import Gargantext.Prelude
import Control.Lens.TH (makeLensesWith, abbreviatedFields)
import Data.Maybe (Maybe)
import Data.Profunctor.Product.TH (makeAdaptorAndInstance)
import Control.Lens.TH (makeLensesWith, abbreviatedFields)
import qualified Database.PostgreSQL.Simple as PGS
import Gargantext.Database.Node (mkCmd, Cmd(..))
import Gargantext.Prelude
import Opaleye
import qualified Database.PostgreSQL.Simple as PGS
data NodeNgramNgramPoly node_id ngram1_id ngram2_id weight =
NodeNgramNgram { nng_NodeId :: node_id
......@@ -42,7 +46,7 @@ data NodeNgramNgramPoly node_id ngram1_id ngram2_id weight =
type NodeNgramNgramWrite =
NodeNgramNgramPoly (Maybe (Column PGInt4 ))
NodeNgramNgramPoly (Column PGInt4 )
(Column PGInt4 )
(Column PGInt4 )
(Maybe (Column PGFloat8))
......@@ -54,10 +58,10 @@ type NodeNgramNgramRead =
(Column PGFloat8)
type NodeNgramNgram =
NodeNgramNgramPoly (Maybe Int )
Int
Int
(Maybe Double)
NodeNgramNgramPoly Int
Int
Int
(Maybe Double)
$(makeAdaptorAndInstance "pNodeNgramNgram"
''NodeNgramNgramPoly)
......@@ -69,7 +73,7 @@ nodeNgramNgramTable :: Table NodeNgramNgramWrite NodeNgramNgramRead
nodeNgramNgramTable =
Table "nodes_ngrams_ngrams"
( pNodeNgramNgram NodeNgramNgram
{ nng_NodeId = optional "node_id"
{ nng_NodeId = required "node_id"
, nng_Ngram1Id = required "ngram1_id"
, nng_Ngram2Id = required "ngram2_id"
, nng_Weight = optional "weight"
......@@ -79,9 +83,10 @@ nodeNgramNgramTable =
queryNodeNgramNgramTable :: Query NodeNgramNgramRead
queryNodeNgramNgramTable = queryTable nodeNgramNgramTable
-- | not optimized (get all ngrams without filters)
nodeNgramNgrams :: PGS.Connection -> IO [NodeNgramNgram]
nodeNgramNgrams conn = runQuery conn queryNodeNgramNgramTable
-- | Select NodeNgramNgram
-- TODO not optimized (get all ngrams without filters)
nodeNgramNgram :: PGS.Connection -> IO [NodeNgramNgram]
nodeNgramNgram conn = runQuery conn queryNodeNgramNgramTable
instance QueryRunnerColumnDefault PGInt4 (Maybe Int) where
queryRunnerColumnDefault = fieldQueryRunnerColumn
......@@ -90,3 +95,19 @@ instance QueryRunnerColumnDefault PGFloat8 (Maybe Double) where
queryRunnerColumnDefault = fieldQueryRunnerColumn
insertNodeNgramNgram :: [NodeNgramNgram] -> Cmd Int
insertNodeNgramNgram = insertNodeNgramNgramW
. map (\(NodeNgramNgram n ng1 ng2 maybeWeight) ->
NodeNgramNgram (pgInt4 n)
(pgInt4 ng1)
(pgInt4 ng2)
(pgDouble <$> maybeWeight)
)
insertNodeNgramNgramW :: [NodeNgramNgramWrite] -> Cmd Int
insertNodeNgramNgramW ns =
mkCmd $ \c -> fromIntegral
<$> runInsertMany c nodeNgramNgramTable ns
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment