Commit 42bd5bfc authored by Alexandre Delanoë's avatar Alexandre Delanoë

Merge branch 'annuaire'

parents 5386816c 3dec90f2
......@@ -147,6 +147,7 @@ library:
- servant-swagger
- servant-swagger-ui
- servant-static-th
- serialise
- split
- stemmer
- string-conversions
......
......@@ -80,6 +80,7 @@ import Gargantext.API.Node ( Roots , roots
, HyperdataCorpus
, HyperdataAnnuaire
)
--import Gargantext.Database.Node.Contact (HyperdataContact)
import Gargantext.Database.Types.Node ()
import Gargantext.API.Count ( CountAPI, count, Query)
import Gargantext.API.Search ( SearchAPI, search, SearchQuery)
......
......@@ -69,7 +69,8 @@ import Test.QuickCheck.Arbitrary (Arbitrary, arbitrary)
------------------------------------------------------------------------
--data FacetFormat = Table | Chart
data TabType = Docs | Terms | Sources | Authors | Institutes | Trash
data TabType = Docs | Terms | Sources | Authors | Institutes | Trash
| Contacts
deriving (Generic, Enum, Bounded)
instance FromHttpApiData TabType
......@@ -80,6 +81,9 @@ instance FromHttpApiData TabType
parseUrlPiece "Institutes" = pure Institutes
parseUrlPiece "Authors" = pure Authors
parseUrlPiece "Trash" = pure Trash
parseUrlPiece "Contacts" = pure Contacts
parseUrlPiece _ = Left "Unexpected value of TabType"
instance ToParamSchema TabType
......
......@@ -147,6 +147,7 @@ nodeAPI conn p id
:<|> getChart conn id
:<|> favApi conn id
:<|> delDocs conn id
-- Annuaire
-- :<|> upload
-- :<|> query
------------------------------------------------------------------------
......
......@@ -13,7 +13,7 @@ Portability : POSIX
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE OverloadedStrings #-}
module Gargantext.Database.Flow (flowDatabase, ngrams2list)
module Gargantext.Database.Flow -- (flowDatabase, ngrams2list)
where
import GHC.Show (Show)
......@@ -28,89 +28,116 @@ import Gargantext.Core.Types (NodePoly(..), ListType(..), listTypeId)
import Gargantext.Database.Bashql (runCmd') -- , del)
import Gargantext.Database.Config (userMaster, userArbitrary, corpusMasterName)
import Gargantext.Database.Ngrams (insertNgrams, Ngrams(..), NgramsT(..), NgramsIndexed(..), indexNgramsT, ngramsTypeId, NgramsType(..), text2ngrams)
import Gargantext.Database.Node (getRoot, mkRoot, mkCorpus, Cmd(..), mkList, mkGraph, mkDashboard)--, mkAnnuaire)
import Gargantext.Database.Node (getRoot, mkRoot, mkCorpus, Cmd(..), mkList, mkGraph, mkDashboard, mkAnnuaire)
import Gargantext.Database.Types.Node (NodeType(..))
import Gargantext.Database.Node.Document.Add (add)
import Gargantext.Database.Node.Document.Insert (insertDocuments, ReturnId(..), addUniqIdsDoc, ToDbData(..))
import Gargantext.Database.Node.Document.Insert (insertDocuments, ReturnId(..), addUniqIdsDoc, addUniqIdsContact, ToDbData(..))
import Gargantext.Database.NodeNgram (NodeNgramPoly(..), insertNodeNgrams)
import Gargantext.Database.NodeNgramsNgrams (NodeNgramsNgramsPoly(..), insertNodeNgramsNgramsNew)
import Gargantext.Database.Types.Node (HyperdataDocument(..))
-- import Gargantext.Database.Node.Contact (HyperdataContact(..))
--import Gargantext.Database.Node.Contact (HyperdataContact(..))
import Gargantext.Database.User (getUser, UserLight(..), Username)
import Gargantext.Ext.IMT (toSchoolName)
import Gargantext.Ext.IMTUser (deserialiseImtUsersFromFile)
import Gargantext.Prelude
import Gargantext.Text.Parsers (parseDocs, FileFormat)
type UserId = Int
type RootId = Int
type CorpusId = Int
type UserId = Int
type MasterUserId = Int
{-
flowCorpus :: [ToDbData] -> CorpusName -> IO CorpusId
flowCorpus = undefined
--}
type RootId = Int
type CorpusId = Int
type MasterCorpusId = Int
flowDatabase :: FileFormat -> FilePath -> CorpusName -> IO Int
flowDatabase :: FileFormat -> FilePath -> CorpusName -> IO CorpusId
flowDatabase ff fp cName = do
-- Corpus Flow
(masterUserId, _, corpusId) <- subFlowCorpus userMaster corpusMasterName
-- Documents Flow
hyperdataDocuments <- map addUniqIdsDoc <$> parseDocs ff fp
params <- flowInsert NodeCorpus hyperdataDocuments cName
flowCorpus NodeCorpus hyperdataDocuments params
flowInsert :: NodeType -> [HyperdataDocument] -> CorpusName
-> IO ([ReturnId], MasterUserId, MasterCorpusId, UserId, CorpusId)
flowInsert _nt hyperdataDocuments cName = do
let hyperdataDocuments' = map (\h -> ToDbDocument h) hyperdataDocuments
printDebug "hyperdataDocuments" (length hyperdataDocuments)
(masterUserId, _, masterCorpusId) <- subFlowCorpus userMaster corpusMasterName
ids <- runCmd' $ insertDocuments masterUserId masterCorpusId hyperdataDocuments'
(userId, _, userCorpusId) <- subFlowCorpus userArbitrary cName
_ <- runCmd' $ add userCorpusId (map reId ids)
pure (ids, masterUserId, masterCorpusId, userId, userCorpusId)
flowAnnuaire :: FilePath -> IO ()
flowAnnuaire filePath = do
contacts <- deserialiseImtUsersFromFile filePath
ps <- flowInsertAnnuaire "Annuaire" $ take 10 $ map (\h-> ToDbContact h) $ map addUniqIdsContact contacts
printDebug "length annuaire" (ps)
ids <- runCmd' $ insertDocuments masterUserId corpusId hyperdataDocuments'
-- printDebug "Docs IDs : " (ids)
-- idsRepeat <- runCmd' $ insertDocuments masterUserId corpusId hyperdataDocuments'
-- printDebug "Repeated Docs IDs : " (length idsRepeat)
let idsNotRepeated = filter (\r -> reInserted r == True) ids
--{-
-- Ngrams Flow
-- todo: flow for new documents only
let tids = toInserted ids
printDebug "toInserted ids" (length tids)
let tihs = toInsert hyperdataDocuments
printDebug "toInsert hyperdataDocuments" (length tihs)
flowInsertAnnuaire :: CorpusName
-> [ToDbData]
-> IO ([ReturnId], UserId, CorpusId, UserId, CorpusId)
flowInsertAnnuaire name children = do
let documentsWithId = mergeData (toInserted idsNotRepeated) (toInsert hyperdataDocuments)
-- printDebug "documentsWithId" documentsWithId
(masterUserId, _, masterCorpusId) <- subFlowCorpus userMaster corpusMasterName
ids <- runCmd' $ insertDocuments masterUserId masterCorpusId children
(userId, _, userCorpusId) <- subFlowAnnuaire userArbitrary name
_ <- runCmd' $ add userCorpusId (map reId ids)
printDebug "AnnuaireID" userCorpusId
pure (ids, masterUserId, masterCorpusId, userId, userCorpusId)
--}
-- docsWithNgrams <- documentIdWithNgrams documentsWithId extractNgramsT
--{-
flowCorpus :: NodeType
-> [HyperdataDocument]
-> ([ReturnId], UserId, CorpusId, UserId, CorpusId)
-> IO CorpusId
flowCorpus NodeCorpus hyperdataDocuments (ids,masterUserId,masterCorpusId, userId,userCorpusId) = do
--}
--------------------------------------------------
-- List Ngrams Flow
userListId <- runCmd' $ listFlowUser userId userCorpusId
printDebug "Working on User ListId : " userListId
let documentsWithId = mergeData (toInserted ids) (toInsert hyperdataDocuments)
-- printDebug "documentsWithId" documentsWithId
let docsWithNgrams = documentIdWithNgrams extractNgramsT documentsWithId
-- printDebug "docsWithNgrams" docsWithNgrams
let maps = mapNodeIdNgrams docsWithNgrams
-- printDebug "maps" (maps)
-- printDebug "maps" (maps)
indexedNgrams <- runCmd' $ indexNgrams maps
-- printDebug "inserted ngrams" indexedNgrams
_ <- runCmd' $ insertToNodeNgrams indexedNgrams
-- List Flow
listId2 <- runCmd' $ listFlow masterUserId corpusId indexedNgrams
printDebug "list id : " listId2
--}
(userId, _, corpusId2) <- subFlowCorpus userArbitrary cName
userListId <- runCmd' $ listFlowUser userId corpusId2
printDebug "UserList : " userListId
inserted <- runCmd' $ add corpusId2 (map reId ids)
printDebug "Added : " (length inserted)
_ <- runCmd' $ mkDashboard corpusId2 userId
_ <- runCmd' $ mkGraph corpusId2 userId
listId2 <- runCmd' $ listFlow masterUserId masterCorpusId indexedNgrams
printDebug "Working on ListId : " listId2
--}
--------------------------------------------------
_ <- runCmd' $ mkDashboard userCorpusId userId
_ <- runCmd' $ mkGraph userCorpusId userId
-- Annuaire Flow
-- _ <- runCmd' $ mkAnnuaire rootUserId userId
pure corpusId2
pure userCorpusId
-- runCmd' $ del [corpusId2, corpusId]
flowCorpus NodeAnnuaire _hyperdataDocuments (_ids,_masterUserId,_masterCorpusId,_userId,_userCorpusId) = undefined
flowCorpus _ _ _ = undefined
type CorpusName = Text
subFlowCorpus :: Username -> CorpusName -> IO (UserId, RootId, CorpusId)
......@@ -138,6 +165,34 @@ subFlowCorpus username cName = do
(username, userId, rootId, corpusId)
pure (userId, rootId, corpusId)
subFlowAnnuaire :: Username -> CorpusName -> IO (UserId, RootId, CorpusId)
subFlowAnnuaire username _cName = do
maybeUserId <- runCmd' (getUser username)
let userId = case maybeUserId of
Nothing -> panic "Error: User does not exist (yet)"
-- mk NodeUser gargantua_id "Node Gargantua"
Just user -> userLight_id user
rootId' <- map _node_id <$> runCmd' (getRoot userId)
rootId'' <- case rootId' of
[] -> runCmd' (mkRoot username userId)
n -> case length n >= 2 of
True -> panic "Error: more than 1 userNode / user"
False -> pure rootId'
let rootId = maybe (panic "error rootId") identity (head rootId'')
corpusId' <- runCmd' $ mkAnnuaire rootId userId
let corpusId = maybe (panic "error corpusId") identity (head corpusId')
printDebug "(username, userId, rootId, corpusId)"
(username, userId, rootId, corpusId)
pure (userId, rootId, corpusId)
------------------------------------------------------------------------
type HashId = Text
......@@ -145,13 +200,13 @@ type NodeId = Int
type ListId = Int
toInsert :: [HyperdataDocument] -> Map HashId HyperdataDocument
toInsert = DM.fromList . map (\d -> (hash (_hyperdataDocument_uniqId d), d))
toInsert = DM.fromList . map (\d -> (maybe err identity (_hyperdataDocument_uniqId d), d))
where
hash = maybe "Error" identity
err = "Database.Flow.toInsert"
toInserted :: [ReturnId] -> Map HashId ReturnId
toInserted rs = DM.fromList $ map (\r -> (reUniqId r, r) )
$ filter (\r -> reInserted r == True) rs
toInserted = DM.fromList . map (\r -> (reUniqId r, r) )
. filter (\r -> reInserted r == True)
data DocumentWithId =
DocumentWithId { documentId :: NodeId
......@@ -186,6 +241,9 @@ extractNgramsT doc = DM.fromList $ [(NgramsT Sources source, 1)]
authors = map text2ngrams $ maybe ["Nothing"] (splitOn ", ") $ _hyperdataDocument_authors doc
-- TODO group terms
documentIdWithNgrams :: (HyperdataDocument -> Map (NgramsT Ngrams) Int)
-> [DocumentWithId] -> [DocumentIdWithNgrams]
documentIdWithNgrams f = map (\d -> DocumentIdWithNgrams d ((f . documentData) d))
......
......@@ -272,8 +272,7 @@ deleteNodes ns = mkCmd $ \conn ->
getNodesWith :: JSONB a => Connection -> Int -> proxy a -> Maybe NodeType
-> Maybe Offset -> Maybe Limit -> IO [Node a]
getNodesWith conn parentId _ nodeType maybeOffset maybeLimit =
runQuery conn $ selectNodesWith
parentId nodeType maybeOffset maybeLimit
runQuery conn $ selectNodesWith parentId nodeType maybeOffset maybeLimit
-- NP check type
......
......@@ -22,6 +22,7 @@ module Gargantext.Database.Node.Contact
import Control.Lens (makeLenses)
import Data.Aeson.TH (deriveJSON)
import Data.Swagger (ToSchema)
import Data.Text (Text)
import Data.Time (UTCTime)
import Database.PostgreSQL.Simple.FromField (FromField, fromField)
......@@ -32,6 +33,8 @@ import Gargantext.Database.Types.Node (Node,Hyperdata,NodeType(..))
import Gargantext.Database.Utils (fromField')
import Gargantext.Prelude
import Opaleye (QueryRunnerColumnDefault, queryRunnerColumnDefault, PGJsonb, fieldQueryRunnerColumn)
import Test.QuickCheck (elements)
import Test.QuickCheck.Arbitrary (Arbitrary, arbitrary)
------------------------------------------------------------------------
......@@ -41,17 +44,28 @@ data HyperdataContact =
HyperdataContact { _hc_bdd :: Maybe Text -- ID of Database source
, _hc_who :: Maybe ContactWho
, _hc_where :: Maybe [ContactWhere]
, _hc_title :: Maybe Text -- TODO remove (only demo)
, _hc_source :: Maybe Text -- TODO remove (only demo)
, _hc_lastValidation :: Maybe Text
, _hc_uniqIdBdd :: Maybe Text
, _hc_uniqId :: Maybe Text
} deriving (Eq, Show, Generic)
-- TOD contact metadata (Type is too flat)
data ContactMetaData =
ContactMetaData { _cm_bdd :: Maybe Text
, _cm_lastValidation :: Maybe Text
} deriving (Eq, Show, Generic)
arbitraryHyperdataContact :: HyperdataContact
arbitraryHyperdataContact = HyperdataContact Nothing Nothing Nothing Nothing Nothing Nothing
arbitraryHyperdataContact = HyperdataContact Nothing Nothing Nothing
Nothing Nothing Nothing
Nothing Nothing
data ContactWho =
ContactWho { _cw_id :: Maybe Int
ContactWho { _cw_id :: Maybe Text
, _cw_firstName :: Maybe Text
, _cw_lastName :: Maybe Text
, _cw_keywords :: Maybe [Text]
......@@ -61,13 +75,17 @@ data ContactWho =
data ContactWhere =
ContactWhere { _cw_organization :: Maybe [Text]
, _cw_labTeamDepts :: Maybe [Text]
, _cw_role :: Maybe Text
, _cw_office :: Maybe Text
, _cw_country :: Maybe Text
, _cw_city :: Maybe Text
, _cw_touch :: Maybe ContactTouch
, _cw_start :: Maybe UTCTime
, _cw_end :: Maybe UTCTime
, _cw_entry :: Maybe UTCTime
, _cw_exit :: Maybe UTCTime
} deriving (Eq, Show, Generic)
data ContactTouch =
......@@ -86,21 +104,37 @@ nodeContactW maybeName maybeContact aId =
contact = maybe arbitraryHyperdataContact identity maybeContact
-- | Main instances of Contact
instance ToSchema HyperdataContact
instance ToSchema ContactWho
instance ToSchema ContactWhere
instance ToSchema ContactTouch
instance Arbitrary HyperdataContact where
arbitrary = elements [HyperdataContact Nothing Nothing Nothing Nothing Nothing Nothing Nothing Nothing]
-- | Specific Gargantext instance
instance Hyperdata HyperdataContact
-- | Database (Posgresql-simple instance)
instance FromField HyperdataContact where
fromField = fromField'
-- | Database (Opaleye instance)
instance QueryRunnerColumnDefault PGJsonb HyperdataContact where
queryRunnerColumnDefault = fieldQueryRunnerColumn
-- | All lenses
makeLenses ''ContactWho
makeLenses ''ContactWhere
makeLenses ''ContactTouch
makeLenses ''ContactMetaData
makeLenses ''HyperdataContact
-- | All Json instances
$(deriveJSON (unPrefix "_cw_") ''ContactWho)
$(deriveJSON (unPrefix "_cw_") ''ContactWhere)
$(deriveJSON (unPrefix "_ct_") ''ContactTouch)
$(deriveJSON (unPrefix "_cm_") ''ContactMetaData)
$(deriveJSON (unPrefix "_hc_") ''HyperdataContact)
......@@ -212,7 +212,7 @@ instance ToRow InputData where
addUniqIdsDoc :: HyperdataDocument -> HyperdataDocument
addUniqIdsDoc doc = set hyperdataDocument_uniqIdBdd (Just hashBdd)
$ set hyperdataDocument_uniqId (Just hash) doc
$ set hyperdataDocument_uniqId (Just hash) doc
where
hash = uniqId $ DT.concat $ map ($ doc) hashParametersDoc
hashBdd = uniqId $ DT.concat $ map ($ doc) ([(\d -> maybe' (_hyperdataDocument_bdd d))] <> hashParametersDoc)
......@@ -231,11 +231,11 @@ hashParametersDoc = [ \d -> maybe' (_hyperdataDocument_title d)
-- * Uniqueness of document definition
-- TODO factorize with above (use the function below for tests)
addUniqIdsContact :: HyperdataContact -> HyperdataContact
addUniqIdsContact hc = set hc_uniqIdBdd (Just hashBdd)
$ set hc_uniqId (Just hash) hc
addUniqIdsContact hc = set (hc_uniqIdBdd) (Just hashBdd)
$ set (hc_uniqId) (Just hash) hc
where
hash = uniqId $ DT.concat $ map ($ hc) hashParametersContact
hashBdd = uniqId $ DT.concat $ map ($ hc) ([(\d -> maybe' (view hc_bdd d))] <> hashParametersContact)
hashBdd = uniqId $ DT.concat $ map ($ hc) ([\d -> maybe' (view hc_bdd d)] <> hashParametersContact)
uniqId :: Text -> Text
uniqId = DT.pack . SHA.showDigest . SHA.sha256 . DC.pack . DT.unpack
......
......@@ -111,3 +111,4 @@ publisBySchool hal_data' = Gargantext.Prelude.map (\(i,n) -> (M.lookup i mapIdSc
$ DV.map (\n -> splitOn (pack ", ") (csvHal_instStructId_i n) )
$ DV.filter (\n -> csvHal_publication_year n == 2017) hal_data'
{-|
Module : Gargantext.Ext.IMTUser
Description : Interface to get IMT users
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
We can not import the IMT Client API code since it is copyrighted.
Here is writtent a common interface.
-}
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE DeriveGeneric #-}
module Gargantext.Ext.IMTUser (deserialiseImtUsersFromFile)
where
import System.IO (FilePath)
import Codec.Serialise
import Data.Maybe (Maybe, catMaybes)
import Data.Text (Text)
import GHC.Generics (Generic)
import Gargantext.Prelude
import Gargantext.Database.Node.Contact -- (HyperdataContact, ContactWho, ContactWhere, ContactTouch, ContactMetaData)
import qualified Data.ByteString.Lazy as BSL
instance Serialise IMTUser
deserialiseImtUsersFromFile :: FilePath -> IO [HyperdataContact]
deserialiseImtUsersFromFile filepath = map imtUser2gargContact <$> deserialiseFromFile' filepath
deserialiseFromFile' :: FilePath -> IO [IMTUser]
deserialiseFromFile' filepath = deserialise <$> BSL.readFile filepath
data IMTUser = IMTUser
{ id :: Text
, entite :: Maybe Text
, mail :: Maybe Text
, nom :: Maybe Text
, prenom :: Maybe Text
, fonction :: Maybe Text
, tel :: Maybe Text
, fax :: Maybe Text
, service :: Maybe Text
, groupe :: Maybe Text
, bureau :: Maybe Text
, url :: Maybe Text
, pservice :: Maybe Text
, pfonction :: Maybe Text
, afonction :: Maybe Text
, grprech :: Maybe Text
, lieu :: Maybe Text
, aprecision :: Maybe Text
, atel :: Maybe Text
, sexe :: Maybe Text
, statut :: Maybe Text
, idutilentite :: Maybe Text
, entite2 :: Maybe Text
, service2 :: Maybe Text
, groupe2 :: Maybe Text
, actif :: Maybe Text
, idutilsiecoles :: Maybe Text
, date_modification :: Maybe Text
} deriving (Eq, Show, Generic)
imtUser2gargContact :: IMTUser -> HyperdataContact
imtUser2gargContact (IMTUser id' entite' mail' nom' prenom' fonction' tel' _fax'
service' _groupe' bureau' url' _pservice' _pfonction' _afonction'
_grprech' lieu' _aprecision' _atel' _sexe' _statut' _idutilentite'
_entite2' _service2' _group2' _actif' _idutilsiecoles' date_modification')
= HyperdataContact (Just "IMT Annuaire") (Just qui) (Just [ou]) ((<>) <$> (fmap (\p -> p <> " ") prenom') <*> nom') entite' date_modification' Nothing Nothing
where
qui = ContactWho (Just id') prenom' nom' (Just $ catMaybes [service']) Nothing
ou = ContactWhere (toList entite') (toList service') fonction' bureau' (Just "France") lieu' contact Nothing Nothing
contact = Just $ ContactTouch mail' tel' url'
-- meta = ContactMetaData (Just "IMT annuaire") date_modification'
toList Nothing = Nothing
toList (Just x) = Just [x]
......@@ -8,8 +8,6 @@ packages:
- 'deps/patches-map'
- 'deps/patches-class'
#- 'deps/imt-api-client'
allow-newer: true
extra-deps:
- git: https://github.com/delanoe/data-time-segment.git
......@@ -30,6 +28,6 @@ extra-deps:
- servant-multipart-0.11.2
- stemmer-0.5.2
- servant-flatten-0.2
- serialise-0.2.0.0 # imt-api-client
- serialise-0.2.0.0
- KMP-0.1.0.2
- validity-0.8.0.0 # patches-{map,class}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment