Pairing.hs 4.49 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11
{-|
Module      : Gargantext.Database.Flow
Description : Database Flow
Copyright   : (c) CNRS, 2017-Present
License     : AGPL + CECILL v3
Maintainer  : team@gargantext.org
Stability   : experimental
Portability : POSIX

-}

12
{-# LANGUAGE QuasiQuotes       #-}
13 14
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE OverloadedStrings #-}
15
{-# LANGUAGE RankNTypes        #-}
16 17 18 19 20
-- {-# LANGUAGE Arrows #-}

module Gargantext.Database.Flow.Pairing
    where

Alexandre Delanoë's avatar
Alexandre Delanoë committed
21 22
--import Debug.Trace (trace)
import Control.Lens (_Just,view)
23 24 25 26 27 28 29 30 31 32 33
import Database.PostgreSQL.Simple.SqlQQ (sql)
-- import Opaleye
-- import Opaleye.Aggregate
-- import Control.Arrow (returnA)
import Data.Maybe (catMaybes)
import Data.Map (Map, fromList)
import Safe (lastMay)
import qualified Data.Map as DM
import Data.Text (Text, toLower)
import qualified Data.Text as DT
import Gargantext.Prelude hiding (sum)
34
import Gargantext.Database.Schema.Ngrams -- (NgramsType(..))
35 36 37 38
--import Gargantext.Database.Node.Contact -- (HyperdataContact(..))
--import Gargantext.Database.Types.Node -- (Hyperdata(..))
import Gargantext.Database.Node.Contact
import Gargantext.Database.Flow.Utils
39
import Gargantext.Database.Utils (Cmd, runPGSQuery)
40
import Gargantext.Database.Types.Node (AnnuaireId, CorpusId)
41 42 43 44 45 46 47
import Gargantext.Database.Node.Children
import Gargantext.Core.Types (NodeType(..))

-- TODO mv this type in Types Main
type Terms = Text

-- | TODO : add paring policy as parameter
48
pairing :: AnnuaireId -> CorpusId -> Cmd err Int
49
pairing aId cId = do
50
  contacts' <- getContacts aId (Just NodeContact)
51
  let contactsMap = pairingPolicyToMap toLower $ toMaps extractNgramsT contacts'
52 53

  ngramsMap' <- getNgramsTindexed cId Authors
54 55 56
  let ngramsMap = pairingPolicyToMap lastName ngramsMap'

  let indexedNgrams = pairMaps contactsMap ngramsMap
57 58

  insertToNodeNgrams indexedNgrams
59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81
  -- TODO add List

lastName :: Terms -> Terms
lastName texte = DT.toLower $ maybe texte (\x -> if DT.length x > 3 then x else texte) (lastName' texte)
  where
    lastName' = lastMay . DT.splitOn " "

-- TODO: this methods is dangerous (maybe equalities of the result are not taken into account
-- emergency demo plan...
pairingPolicyToMap :: (Terms -> Terms)
                    -> Map (NgramsT Ngrams) a -> Map (NgramsT Ngrams) a
pairingPolicyToMap f = DM.mapKeys (pairingPolicy f)

pairingPolicy :: (Terms -> Terms) -> NgramsT Ngrams -> NgramsT Ngrams
pairingPolicy f (NgramsT nt (Ngrams ng _)) = (NgramsT nt (Ngrams (f ng) 1))

-- | TODO : use Occurrences in place of Int
extractNgramsT :: HyperdataContact -> Map (NgramsT Ngrams) Int
extractNgramsT contact = fromList [(NgramsT Authors    a' , 1)| a' <- authors    ]
  where
    authors    = map text2ngrams $ catMaybes [view (hc_who . _Just . cw_lastName) contact]
--}

82 83
-- NP: notice how this function is no longer specific to the ContactId type
pairMaps :: Map (NgramsT Ngrams) a
84
         -> Map (NgramsT Ngrams) NgramsId
85 86 87 88 89 90 91
         -> Map NgramsIndexed (Map NgramsType a)
pairMaps m1 m2 =
  DM.fromList
    [ (NgramsIndexed ng nId, DM.singleton nt n2i)
    | (k@(NgramsT nt ng),n2i) <- DM.toList m1
    , Just nId <- [DM.lookup k m2]
    ]
92 93

-----------------------------------------------------------------------
94 95
getNgramsTindexed:: CorpusId -> NgramsType -> Cmd err (Map (NgramsT Ngrams) NgramsId)
getNgramsTindexed corpusId ngramsType' = fromList
96
    <$> map (\(ngramsId',t,n) -> (NgramsT ngramsType' (Ngrams t n),ngramsId'))
97
    <$> selectNgramsTindexed corpusId ngramsType'
98

99 100
selectNgramsTindexed :: CorpusId -> NgramsType -> Cmd err [(NgramsId, Terms, Int)]
selectNgramsTindexed corpusId ngramsType'' = runPGSQuery selectQuery (corpusId, ngramsTypeId ngramsType'')
101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127
  where
    selectQuery = [sql| SELECT n.id,n.terms,n.n from ngrams n
                  JOIN nodes_ngrams occ ON occ.ngram_id = n.id
                  JOIN nodes_nodes  nn  ON nn.node2_id = occ.node_id
                  
                  WHERE nn.node1_id     = ?
                    AND occ.ngrams_type = ?
                    AND occ.node_id = nn.node2_id
                  GROUP BY n.id;
                 |]

{- | TODO more typed SQL queries
selectNgramsTindexed :: CorpusId -> NgramsType -> Query NgramsRead
selectNgramsTindexed corpusId ngramsType = proc () -> do
    nodeNode   <- queryNodeNodeTable     -< ()
    nodeNgrams <- queryNodesNgramsTable  -< ()
    ngrams     <- queryNgramsTable       -< ()

    restrict -< node1_id nodeNode .== pgInt4 corpusId
    restrict -< node2_id nodeNode .== node_id nodeNgrams
    restrict -< ngrams_id ngrams  .== node_ngrams nodeNgrams

    result <- aggregate groupBy (ngrams_id ngrams)
    returnA -< result
--}