Commit e50ae8f8 authored by Alexandre Delanoë's avatar Alexandre Delanoë

[Annuaire] flow to insert in DB.

parent 28e68956
...@@ -13,7 +13,7 @@ Portability : POSIX ...@@ -13,7 +13,7 @@ Portability : POSIX
{-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE OverloadedStrings #-}
module Gargantext.Database.Flow (flowDatabase, ngrams2list) module Gargantext.Database.Flow -- (flowDatabase, ngrams2list)
where where
import GHC.Show (Show) import GHC.Show (Show)
...@@ -29,88 +29,99 @@ import Gargantext.Database.Bashql (runCmd') -- , del) ...@@ -29,88 +29,99 @@ import Gargantext.Database.Bashql (runCmd') -- , del)
import Gargantext.Database.Config (userMaster, userArbitrary, corpusMasterName) import Gargantext.Database.Config (userMaster, userArbitrary, corpusMasterName)
import Gargantext.Database.Ngrams (insertNgrams, Ngrams(..), NgramsT(..), NgramsIndexed(..), indexNgramsT, ngramsTypeId, NgramsType(..), text2ngrams) import Gargantext.Database.Ngrams (insertNgrams, Ngrams(..), NgramsT(..), NgramsIndexed(..), indexNgramsT, ngramsTypeId, NgramsType(..), text2ngrams)
import Gargantext.Database.Node (getRoot, mkRoot, mkCorpus, Cmd(..), mkList, mkGraph, mkDashboard)--, mkAnnuaire) import Gargantext.Database.Node (getRoot, mkRoot, mkCorpus, Cmd(..), mkList, mkGraph, mkDashboard)--, mkAnnuaire)
import Gargantext.Database.Types.Node (NodeType(..))
import Gargantext.Database.Node.Document.Add (add) import Gargantext.Database.Node.Document.Add (add)
import Gargantext.Database.Node.Document.Insert (insertDocuments, ReturnId(..), addUniqIdsDoc, ToDbData(..)) import Gargantext.Database.Node.Document.Insert (insertDocuments, ReturnId(..), addUniqIdsDoc, addUniqIdsContact, ToDbData(..))
import Gargantext.Database.NodeNgram (NodeNgramPoly(..), insertNodeNgrams) import Gargantext.Database.NodeNgram (NodeNgramPoly(..), insertNodeNgrams)
import Gargantext.Database.NodeNgramsNgrams (NodeNgramsNgramsPoly(..), insertNodeNgramsNgramsNew) import Gargantext.Database.NodeNgramsNgrams (NodeNgramsNgramsPoly(..), insertNodeNgramsNgramsNew)
import Gargantext.Database.Types.Node (HyperdataDocument(..)) import Gargantext.Database.Types.Node (HyperdataDocument(..))
-- import Gargantext.Database.Node.Contact (HyperdataContact(..)) import Gargantext.Database.Node.Contact (HyperdataContact(..))
import Gargantext.Database.User (getUser, UserLight(..), Username) import Gargantext.Database.User (getUser, UserLight(..), Username)
import Gargantext.Ext.IMT (toSchoolName) import Gargantext.Ext.IMT (toSchoolName)
import Gargantext.Prelude import Gargantext.Prelude
import Gargantext.Text.Parsers (parseDocs, FileFormat) import Gargantext.Text.Parsers (parseDocs, FileFormat)
type UserId = Int type UserId = Int
type RootId = Int type MasterUserId = Int
type RootId = Int
type CorpusId = Int type CorpusId = Int
{-
flowCorpus :: [ToDbData] -> CorpusName -> IO CorpusId
flowCorpus = undefined
--}
flowDatabase :: FileFormat -> FilePath -> CorpusName -> IO Int flowDatabase :: FileFormat -> FilePath -> CorpusName -> IO CorpusId
flowDatabase ff fp cName = do flowDatabase ff fp cName = do
-- Corpus Flow -- Corpus Flow
(masterUserId, _, corpusId) <- subFlowCorpus userMaster corpusMasterName
-- Documents Flow
hyperdataDocuments <- map addUniqIdsDoc <$> parseDocs ff fp hyperdataDocuments <- map addUniqIdsDoc <$> parseDocs ff fp
let hyperdataDocuments' = map (\h -> ToDbDocument h) hyperdataDocuments params <- flowInsert NodeCorpus hyperdataDocuments cName
printDebug "hyperdataDocuments" (length hyperdataDocuments) flowCorpus NodeCorpus hyperdataDocuments params
flowInsert :: NodeType -> [HyperdataDocument] -> CorpusName
-> IO ([ReturnId], MasterUserId, UserId, CorpusId)
flowInsert nt hyperdataDocuments cName = do
let hyperdataDocuments' = case nt of
NodeCorpus -> map (\h -> ToDbDocument h) hyperdataDocuments
-- NodeAnnuaire -> map (\h -> ToDbContact h) hyperdataDocuments
(masterUserId, _, masterCorpusId) <- subFlowCorpus userMaster corpusMasterName
ids <- runCmd' $ insertDocuments masterUserId masterCorpusId hyperdataDocuments'
(userId, _, userCorpusId) <- subFlowCorpus userArbitrary cName
_ <- runCmd' $ add userCorpusId (map reId ids)
pure (ids, masterUserId, userId, userCorpusId)
ids <- runCmd' $ insertDocuments masterUserId corpusId hyperdataDocuments'
-- printDebug "Docs IDs : " (ids)
-- idsRepeat <- runCmd' $ insertDocuments masterUserId corpusId hyperdataDocuments'
-- printDebug "Repeated Docs IDs : " (length idsRepeat)
let idsNotRepeated = filter (\r -> reInserted r == True) ids
--{- --{-
-- Ngrams Flow flowInsertAnnuaire children name = do
-- todo: flow for new documents only
let tids = toInserted ids (masterUserId, _, masterCorpusId) <- subFlowCorpus userMaster corpusMasterName
printDebug "toInserted ids" (length tids) ids <- runCmd' $ insertDocuments masterUserId masterCorpusId children
(userId, _, userCorpusId) <- subFlowCorpus userArbitrary name
_ <- runCmd' $ add userCorpusId (map reId ids)
pure (ids, masterUserId, userId, userCorpusId)
let tihs = toInsert hyperdataDocuments
printDebug "toInsert hyperdataDocuments" (length tihs)
let documentsWithId = mergeData (toInserted idsNotRepeated) (toInsert hyperdataDocuments) --}
-- printDebug "documentsWithId" documentsWithId
-- docsWithNgrams <- documentIdWithNgrams documentsWithId extractNgramsT --{-
-- flowCorpus :: NodeType -> [HyperdataDocument] -> ([ReturnId],MasterUserId,UserId,CorpusId) -> IO CorpusId
flowCorpus NodeCorpus hyperdataDocuments (ids,masterUserId,userId,userCorpusId) = do
--}
--------------------------------------------------
-- List Ngrams Flow
userListId <- runCmd' $ listFlowUser userId userCorpusId
printDebug "Working on User ListId : " userListId
let documentsWithId = mergeData (toInserted ids) (toInsert hyperdataDocuments)
-- printDebug "documentsWithId" documentsWithId
let docsWithNgrams = documentIdWithNgrams extractNgramsT documentsWithId let docsWithNgrams = documentIdWithNgrams extractNgramsT documentsWithId
-- printDebug "docsWithNgrams" docsWithNgrams -- printDebug "docsWithNgrams" docsWithNgrams
let maps = mapNodeIdNgrams docsWithNgrams let maps = mapNodeIdNgrams docsWithNgrams
-- printDebug "maps" (maps)
-- printDebug "maps" (maps)
indexedNgrams <- runCmd' $ indexNgrams maps indexedNgrams <- runCmd' $ indexNgrams maps
-- printDebug "inserted ngrams" indexedNgrams -- printDebug "inserted ngrams" indexedNgrams
_ <- runCmd' $ insertToNodeNgrams indexedNgrams _ <- runCmd' $ insertToNodeNgrams indexedNgrams
-- List Flow
listId2 <- runCmd' $ listFlow masterUserId corpusId indexedNgrams
printDebug "list id : " listId2
--}
(userId, _, corpusId2) <- subFlowCorpus userArbitrary cName
userListId <- runCmd' $ listFlowUser userId corpusId2
printDebug "UserList : " userListId
inserted <- runCmd' $ add corpusId2 (map reId ids)
printDebug "Added : " (length inserted)
_ <- runCmd' $ mkDashboard corpusId2 userId listId2 <- runCmd' $ listFlow masterUserId userCorpusId indexedNgrams
_ <- runCmd' $ mkGraph corpusId2 userId printDebug "Working on ListId : " listId2
--}
--------------------------------------------------
_ <- runCmd' $ mkDashboard userCorpusId userId
_ <- runCmd' $ mkGraph userCorpusId userId
-- Annuaire Flow -- Annuaire Flow
-- _ <- runCmd' $ mkAnnuaire rootUserId userId -- _ <- runCmd' $ mkAnnuaire rootUserId userId
pure corpusId2 pure userCorpusId
-- runCmd' $ del [corpusId2, corpusId] -- runCmd' $ del [corpusId2, corpusId]
flowCorpus NodeAnnuaire _hyperdataDocuments (_ids,_masterUserId,_userId,_userCorpusId) = undefined
flowCorpus _ _ _ = undefined
type CorpusName = Text type CorpusName = Text
subFlowCorpus :: Username -> CorpusName -> IO (UserId, RootId, CorpusId) subFlowCorpus :: Username -> CorpusName -> IO (UserId, RootId, CorpusId)
...@@ -145,13 +156,13 @@ type NodeId = Int ...@@ -145,13 +156,13 @@ type NodeId = Int
type ListId = Int type ListId = Int
toInsert :: [HyperdataDocument] -> Map HashId HyperdataDocument toInsert :: [HyperdataDocument] -> Map HashId HyperdataDocument
toInsert = DM.fromList . map (\d -> (hash (_hyperdataDocument_uniqId d), d)) toInsert = DM.fromList . map (\d -> (maybe err identity (_hyperdataDocument_uniqId d), d))
where where
hash = maybe "Error" identity err = "Database.Flow.toInsert"
toInserted :: [ReturnId] -> Map HashId ReturnId toInserted :: [ReturnId] -> Map HashId ReturnId
toInserted rs = DM.fromList $ map (\r -> (reUniqId r, r) ) toInserted = DM.fromList . map (\r -> (reUniqId r, r) )
$ filter (\r -> reInserted r == True) rs . filter (\r -> reInserted r == True)
data DocumentWithId = data DocumentWithId =
DocumentWithId { documentId :: NodeId DocumentWithId { documentId :: NodeId
...@@ -186,6 +197,9 @@ extractNgramsT doc = DM.fromList $ [(NgramsT Sources source, 1)] ...@@ -186,6 +197,9 @@ extractNgramsT doc = DM.fromList $ [(NgramsT Sources source, 1)]
authors = map text2ngrams $ maybe ["Nothing"] (splitOn ", ") $ _hyperdataDocument_authors doc authors = map text2ngrams $ maybe ["Nothing"] (splitOn ", ") $ _hyperdataDocument_authors doc
-- TODO group terms -- TODO group terms
documentIdWithNgrams :: (HyperdataDocument -> Map (NgramsT Ngrams) Int) documentIdWithNgrams :: (HyperdataDocument -> Map (NgramsT Ngrams) Int)
-> [DocumentWithId] -> [DocumentIdWithNgrams] -> [DocumentWithId] -> [DocumentIdWithNgrams]
documentIdWithNgrams f = map (\d -> DocumentIdWithNgrams d ((f . documentData) d)) documentIdWithNgrams f = map (\d -> DocumentIdWithNgrams d ((f . documentData) d))
......
...@@ -38,17 +38,22 @@ import Opaleye (QueryRunnerColumnDefault, queryRunnerColumnDefault, PGJsonb, fie ...@@ -38,17 +38,22 @@ import Opaleye (QueryRunnerColumnDefault, queryRunnerColumnDefault, PGJsonb, fie
type NodeContact = Node HyperdataContact type NodeContact = Node HyperdataContact
data HyperdataContact = data HyperdataContact =
HyperdataContact { _hc_bdd :: Maybe Text -- ID of Database source HyperdataContact { _hc_who :: Maybe ContactWho
, _hc_who :: Maybe ContactWho , _hc_where :: Maybe [ContactWhere]
, _hc_where :: Maybe [ContactWhere] , _hc_metaData :: Maybe ContactMetaData
, _hc_lastValidation :: Maybe Text
, _hc_uniqIdBdd :: Maybe Text
, _hc_uniqId :: Maybe Text
} deriving (Eq, Show, Generic) } deriving (Eq, Show, Generic)
data ContactMetaData =
ContactMetaData { _cm_bdd :: Maybe Text
, _cm_lastValidation :: Maybe Text
, _cm_uniqIdBdd :: Maybe Text
, _cm_uniqId :: Maybe Text
} deriving (Eq, Show, Generic)
arbitraryHyperdataContact :: HyperdataContact arbitraryHyperdataContact :: HyperdataContact
arbitraryHyperdataContact = HyperdataContact Nothing Nothing Nothing Nothing Nothing Nothing arbitraryHyperdataContact = HyperdataContact Nothing Nothing Nothing
data ContactWho = data ContactWho =
ContactWho { _cw_id :: Maybe Int ContactWho { _cw_id :: Maybe Int
...@@ -61,13 +66,17 @@ data ContactWho = ...@@ -61,13 +66,17 @@ data ContactWho =
data ContactWhere = data ContactWhere =
ContactWhere { _cw_organization :: Maybe [Text] ContactWhere { _cw_organization :: Maybe [Text]
, _cw_labTeamDepts :: Maybe [Text] , _cw_labTeamDepts :: Maybe [Text]
, _cw_role :: Maybe Text , _cw_role :: Maybe Text
, _cw_office :: Maybe Text , _cw_office :: Maybe Text
, _cw_country :: Maybe Text , _cw_country :: Maybe Text
, _cw_city :: Maybe Text , _cw_city :: Maybe Text
, _cw_touch :: Maybe ContactTouch , _cw_touch :: Maybe ContactTouch
, _cw_start :: Maybe UTCTime
, _cw_end :: Maybe UTCTime , _cw_entry :: Maybe UTCTime
, _cw_exit :: Maybe UTCTime
} deriving (Eq, Show, Generic) } deriving (Eq, Show, Generic)
data ContactTouch = data ContactTouch =
...@@ -86,21 +95,29 @@ nodeContactW maybeName maybeContact aId = ...@@ -86,21 +95,29 @@ nodeContactW maybeName maybeContact aId =
contact = maybe arbitraryHyperdataContact identity maybeContact contact = maybe arbitraryHyperdataContact identity maybeContact
-- | Main instances of Contact
-- | Specific Gargantext instance
instance Hyperdata HyperdataContact instance Hyperdata HyperdataContact
-- | Database (Posgresql-simple instance)
instance FromField HyperdataContact where instance FromField HyperdataContact where
fromField = fromField' fromField = fromField'
-- | Database (Opaleye instance)
instance QueryRunnerColumnDefault PGJsonb HyperdataContact where instance QueryRunnerColumnDefault PGJsonb HyperdataContact where
queryRunnerColumnDefault = fieldQueryRunnerColumn queryRunnerColumnDefault = fieldQueryRunnerColumn
-- | All lenses
makeLenses ''ContactWho makeLenses ''ContactWho
makeLenses ''ContactWhere makeLenses ''ContactWhere
makeLenses ''ContactTouch makeLenses ''ContactTouch
makeLenses ''ContactMetaData
makeLenses ''HyperdataContact makeLenses ''HyperdataContact
-- | All Json instances
$(deriveJSON (unPrefix "_cw_") ''ContactWho) $(deriveJSON (unPrefix "_cw_") ''ContactWho)
$(deriveJSON (unPrefix "_cw_") ''ContactWhere) $(deriveJSON (unPrefix "_cw_") ''ContactWhere)
$(deriveJSON (unPrefix "_ct_") ''ContactTouch) $(deriveJSON (unPrefix "_ct_") ''ContactTouch)
$(deriveJSON (unPrefix "_cm_") ''ContactMetaData)
$(deriveJSON (unPrefix "_hc_") ''HyperdataContact) $(deriveJSON (unPrefix "_hc_") ''HyperdataContact)
...@@ -62,7 +62,6 @@ module Gargantext.Database.Node.Document.Insert where ...@@ -62,7 +62,6 @@ module Gargantext.Database.Node.Document.Insert where
import Control.Lens (set, view) import Control.Lens (set, view)
import Control.Lens.Prism import Control.Lens.Prism
import Control.Lens.Cons import Control.Lens.Cons
import Control.Monad (join)
import Data.Aeson (toJSON, Value) import Data.Aeson (toJSON, Value)
import Data.Maybe (maybe) import Data.Maybe (maybe)
import Data.Text (Text) import Data.Text (Text)
...@@ -232,11 +231,11 @@ hashParametersDoc = [ \d -> maybe' (_hyperdataDocument_title d) ...@@ -232,11 +231,11 @@ hashParametersDoc = [ \d -> maybe' (_hyperdataDocument_title d)
-- * Uniqueness of document definition -- * Uniqueness of document definition
-- TODO factorize with above (use the function below for tests) -- TODO factorize with above (use the function below for tests)
addUniqIdsContact :: HyperdataContact -> HyperdataContact addUniqIdsContact :: HyperdataContact -> HyperdataContact
addUniqIdsContact hc = set hc_uniqIdBdd (Just hashBdd) addUniqIdsContact hc = set (hc_metaData . _Just . cm_uniqIdBdd . _Just) hashBdd
$ set hc_uniqId (Just hash) hc $ set (hc_metaData . _Just . cm_uniqId . _Just) hash hc
where where
hash = uniqId $ DT.concat $ map ($ hc) hashParametersContact hash = uniqId $ DT.concat $ map ($ hc) hashParametersContact
hashBdd = uniqId $ DT.concat $ map ($ hc) ([(\d -> maybe' (view hc_bdd d))] <> hashParametersContact) hashBdd = uniqId $ DT.concat $ map ($ hc) ([\d -> maybe' (view (hc_metaData . _Just . cm_bdd) d)] <> hashParametersContact)
uniqId :: Text -> Text uniqId :: Text -> Text
uniqId = DT.pack . SHA.showDigest . SHA.sha256 . DC.pack . DT.unpack uniqId = DT.pack . SHA.showDigest . SHA.sha256 . DC.pack . DT.unpack
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment