Commit e50ae8f8 authored by Alexandre Delanoë's avatar Alexandre Delanoë

[Annuaire] flow to insert in DB.

parent 28e68956
......@@ -13,7 +13,7 @@ Portability : POSIX
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE OverloadedStrings #-}
module Gargantext.Database.Flow (flowDatabase, ngrams2list)
module Gargantext.Database.Flow -- (flowDatabase, ngrams2list)
where
import GHC.Show (Show)
......@@ -29,88 +29,99 @@ import Gargantext.Database.Bashql (runCmd') -- , del)
import Gargantext.Database.Config (userMaster, userArbitrary, corpusMasterName)
import Gargantext.Database.Ngrams (insertNgrams, Ngrams(..), NgramsT(..), NgramsIndexed(..), indexNgramsT, ngramsTypeId, NgramsType(..), text2ngrams)
import Gargantext.Database.Node (getRoot, mkRoot, mkCorpus, Cmd(..), mkList, mkGraph, mkDashboard)--, mkAnnuaire)
import Gargantext.Database.Types.Node (NodeType(..))
import Gargantext.Database.Node.Document.Add (add)
import Gargantext.Database.Node.Document.Insert (insertDocuments, ReturnId(..), addUniqIdsDoc, ToDbData(..))
import Gargantext.Database.Node.Document.Insert (insertDocuments, ReturnId(..), addUniqIdsDoc, addUniqIdsContact, ToDbData(..))
import Gargantext.Database.NodeNgram (NodeNgramPoly(..), insertNodeNgrams)
import Gargantext.Database.NodeNgramsNgrams (NodeNgramsNgramsPoly(..), insertNodeNgramsNgramsNew)
import Gargantext.Database.Types.Node (HyperdataDocument(..))
-- import Gargantext.Database.Node.Contact (HyperdataContact(..))
import Gargantext.Database.Node.Contact (HyperdataContact(..))
import Gargantext.Database.User (getUser, UserLight(..), Username)
import Gargantext.Ext.IMT (toSchoolName)
import Gargantext.Prelude
import Gargantext.Text.Parsers (parseDocs, FileFormat)
type UserId = Int
type RootId = Int
type UserId = Int
type MasterUserId = Int
type RootId = Int
type CorpusId = Int
{-
flowCorpus :: [ToDbData] -> CorpusName -> IO CorpusId
flowCorpus = undefined
--}
flowDatabase :: FileFormat -> FilePath -> CorpusName -> IO Int
flowDatabase :: FileFormat -> FilePath -> CorpusName -> IO CorpusId
flowDatabase ff fp cName = do
-- Corpus Flow
(masterUserId, _, corpusId) <- subFlowCorpus userMaster corpusMasterName
-- Documents Flow
hyperdataDocuments <- map addUniqIdsDoc <$> parseDocs ff fp
let hyperdataDocuments' = map (\h -> ToDbDocument h) hyperdataDocuments
printDebug "hyperdataDocuments" (length hyperdataDocuments)
params <- flowInsert NodeCorpus hyperdataDocuments cName
flowCorpus NodeCorpus hyperdataDocuments params
flowInsert :: NodeType -> [HyperdataDocument] -> CorpusName
-> IO ([ReturnId], MasterUserId, UserId, CorpusId)
flowInsert nt hyperdataDocuments cName = do
let hyperdataDocuments' = case nt of
NodeCorpus -> map (\h -> ToDbDocument h) hyperdataDocuments
-- NodeAnnuaire -> map (\h -> ToDbContact h) hyperdataDocuments
(masterUserId, _, masterCorpusId) <- subFlowCorpus userMaster corpusMasterName
ids <- runCmd' $ insertDocuments masterUserId masterCorpusId hyperdataDocuments'
(userId, _, userCorpusId) <- subFlowCorpus userArbitrary cName
_ <- runCmd' $ add userCorpusId (map reId ids)
pure (ids, masterUserId, userId, userCorpusId)
ids <- runCmd' $ insertDocuments masterUserId corpusId hyperdataDocuments'
-- printDebug "Docs IDs : " (ids)
-- idsRepeat <- runCmd' $ insertDocuments masterUserId corpusId hyperdataDocuments'
-- printDebug "Repeated Docs IDs : " (length idsRepeat)
let idsNotRepeated = filter (\r -> reInserted r == True) ids
--{-
-- Ngrams Flow
-- todo: flow for new documents only
let tids = toInserted ids
printDebug "toInserted ids" (length tids)
flowInsertAnnuaire children name = do
(masterUserId, _, masterCorpusId) <- subFlowCorpus userMaster corpusMasterName
ids <- runCmd' $ insertDocuments masterUserId masterCorpusId children
(userId, _, userCorpusId) <- subFlowCorpus userArbitrary name
_ <- runCmd' $ add userCorpusId (map reId ids)
pure (ids, masterUserId, userId, userCorpusId)
let tihs = toInsert hyperdataDocuments
printDebug "toInsert hyperdataDocuments" (length tihs)
let documentsWithId = mergeData (toInserted idsNotRepeated) (toInsert hyperdataDocuments)
-- printDebug "documentsWithId" documentsWithId
--}
-- docsWithNgrams <- documentIdWithNgrams documentsWithId extractNgramsT
--{-
-- flowCorpus :: NodeType -> [HyperdataDocument] -> ([ReturnId],MasterUserId,UserId,CorpusId) -> IO CorpusId
flowCorpus NodeCorpus hyperdataDocuments (ids,masterUserId,userId,userCorpusId) = do
--}
--------------------------------------------------
-- List Ngrams Flow
userListId <- runCmd' $ listFlowUser userId userCorpusId
printDebug "Working on User ListId : " userListId
let documentsWithId = mergeData (toInserted ids) (toInsert hyperdataDocuments)
-- printDebug "documentsWithId" documentsWithId
let docsWithNgrams = documentIdWithNgrams extractNgramsT documentsWithId
-- printDebug "docsWithNgrams" docsWithNgrams
let maps = mapNodeIdNgrams docsWithNgrams
-- printDebug "maps" (maps)
-- printDebug "maps" (maps)
indexedNgrams <- runCmd' $ indexNgrams maps
-- printDebug "inserted ngrams" indexedNgrams
_ <- runCmd' $ insertToNodeNgrams indexedNgrams
-- List Flow
listId2 <- runCmd' $ listFlow masterUserId corpusId indexedNgrams
printDebug "list id : " listId2
--}
(userId, _, corpusId2) <- subFlowCorpus userArbitrary cName
userListId <- runCmd' $ listFlowUser userId corpusId2
printDebug "UserList : " userListId
inserted <- runCmd' $ add corpusId2 (map reId ids)
printDebug "Added : " (length inserted)
_ <- runCmd' $ mkDashboard corpusId2 userId
_ <- runCmd' $ mkGraph corpusId2 userId
listId2 <- runCmd' $ listFlow masterUserId userCorpusId indexedNgrams
printDebug "Working on ListId : " listId2
--}
--------------------------------------------------
_ <- runCmd' $ mkDashboard userCorpusId userId
_ <- runCmd' $ mkGraph userCorpusId userId
-- Annuaire Flow
-- _ <- runCmd' $ mkAnnuaire rootUserId userId
pure corpusId2
pure userCorpusId
-- runCmd' $ del [corpusId2, corpusId]
flowCorpus NodeAnnuaire _hyperdataDocuments (_ids,_masterUserId,_userId,_userCorpusId) = undefined
flowCorpus _ _ _ = undefined
type CorpusName = Text
subFlowCorpus :: Username -> CorpusName -> IO (UserId, RootId, CorpusId)
......@@ -145,13 +156,13 @@ type NodeId = Int
type ListId = Int
toInsert :: [HyperdataDocument] -> Map HashId HyperdataDocument
toInsert = DM.fromList . map (\d -> (hash (_hyperdataDocument_uniqId d), d))
toInsert = DM.fromList . map (\d -> (maybe err identity (_hyperdataDocument_uniqId d), d))
where
hash = maybe "Error" identity
err = "Database.Flow.toInsert"
toInserted :: [ReturnId] -> Map HashId ReturnId
toInserted rs = DM.fromList $ map (\r -> (reUniqId r, r) )
$ filter (\r -> reInserted r == True) rs
toInserted = DM.fromList . map (\r -> (reUniqId r, r) )
. filter (\r -> reInserted r == True)
data DocumentWithId =
DocumentWithId { documentId :: NodeId
......@@ -186,6 +197,9 @@ extractNgramsT doc = DM.fromList $ [(NgramsT Sources source, 1)]
authors = map text2ngrams $ maybe ["Nothing"] (splitOn ", ") $ _hyperdataDocument_authors doc
-- TODO group terms
documentIdWithNgrams :: (HyperdataDocument -> Map (NgramsT Ngrams) Int)
-> [DocumentWithId] -> [DocumentIdWithNgrams]
documentIdWithNgrams f = map (\d -> DocumentIdWithNgrams d ((f . documentData) d))
......
......@@ -38,17 +38,22 @@ import Opaleye (QueryRunnerColumnDefault, queryRunnerColumnDefault, PGJsonb, fie
type NodeContact = Node HyperdataContact
data HyperdataContact =
HyperdataContact { _hc_bdd :: Maybe Text -- ID of Database source
, _hc_who :: Maybe ContactWho
, _hc_where :: Maybe [ContactWhere]
, _hc_lastValidation :: Maybe Text
, _hc_uniqIdBdd :: Maybe Text
, _hc_uniqId :: Maybe Text
HyperdataContact { _hc_who :: Maybe ContactWho
, _hc_where :: Maybe [ContactWhere]
, _hc_metaData :: Maybe ContactMetaData
} deriving (Eq, Show, Generic)
data ContactMetaData =
ContactMetaData { _cm_bdd :: Maybe Text
, _cm_lastValidation :: Maybe Text
, _cm_uniqIdBdd :: Maybe Text
, _cm_uniqId :: Maybe Text
} deriving (Eq, Show, Generic)
arbitraryHyperdataContact :: HyperdataContact
arbitraryHyperdataContact = HyperdataContact Nothing Nothing Nothing Nothing Nothing Nothing
arbitraryHyperdataContact = HyperdataContact Nothing Nothing Nothing
data ContactWho =
ContactWho { _cw_id :: Maybe Int
......@@ -61,13 +66,17 @@ data ContactWho =
data ContactWhere =
ContactWhere { _cw_organization :: Maybe [Text]
, _cw_labTeamDepts :: Maybe [Text]
, _cw_role :: Maybe Text
, _cw_office :: Maybe Text
, _cw_country :: Maybe Text
, _cw_city :: Maybe Text
, _cw_touch :: Maybe ContactTouch
, _cw_start :: Maybe UTCTime
, _cw_end :: Maybe UTCTime
, _cw_entry :: Maybe UTCTime
, _cw_exit :: Maybe UTCTime
} deriving (Eq, Show, Generic)
data ContactTouch =
......@@ -86,21 +95,29 @@ nodeContactW maybeName maybeContact aId =
contact = maybe arbitraryHyperdataContact identity maybeContact
-- | Main instances of Contact
-- | Specific Gargantext instance
instance Hyperdata HyperdataContact
-- | Database (Posgresql-simple instance)
instance FromField HyperdataContact where
fromField = fromField'
-- | Database (Opaleye instance)
instance QueryRunnerColumnDefault PGJsonb HyperdataContact where
queryRunnerColumnDefault = fieldQueryRunnerColumn
-- | All lenses
makeLenses ''ContactWho
makeLenses ''ContactWhere
makeLenses ''ContactTouch
makeLenses ''ContactMetaData
makeLenses ''HyperdataContact
-- | All Json instances
$(deriveJSON (unPrefix "_cw_") ''ContactWho)
$(deriveJSON (unPrefix "_cw_") ''ContactWhere)
$(deriveJSON (unPrefix "_ct_") ''ContactTouch)
$(deriveJSON (unPrefix "_cm_") ''ContactMetaData)
$(deriveJSON (unPrefix "_hc_") ''HyperdataContact)
......@@ -62,7 +62,6 @@ module Gargantext.Database.Node.Document.Insert where
import Control.Lens (set, view)
import Control.Lens.Prism
import Control.Lens.Cons
import Control.Monad (join)
import Data.Aeson (toJSON, Value)
import Data.Maybe (maybe)
import Data.Text (Text)
......@@ -232,11 +231,11 @@ hashParametersDoc = [ \d -> maybe' (_hyperdataDocument_title d)
-- * Uniqueness of document definition
-- TODO factorize with above (use the function below for tests)
addUniqIdsContact :: HyperdataContact -> HyperdataContact
addUniqIdsContact hc = set hc_uniqIdBdd (Just hashBdd)
$ set hc_uniqId (Just hash) hc
addUniqIdsContact hc = set (hc_metaData . _Just . cm_uniqIdBdd . _Just) hashBdd
$ set (hc_metaData . _Just . cm_uniqId . _Just) hash hc
where
hash = uniqId $ DT.concat $ map ($ hc) hashParametersContact
hashBdd = uniqId $ DT.concat $ map ($ hc) ([(\d -> maybe' (view hc_bdd d))] <> hashParametersContact)
hashBdd = uniqId $ DT.concat $ map ($ hc) ([\d -> maybe' (view (hc_metaData . _Just . cm_bdd) d)] <> hashParametersContact)
uniqId :: Text -> Text
uniqId = DT.pack . SHA.showDigest . SHA.sha256 . DC.pack . DT.unpack
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment