Commit 6219c04b authored by Alfredo Di Napoli's avatar Alfredo Di Napoli

Untested, tentative support for passing Lang in a few places (fixes #250)

parent e209df2e
...@@ -10,6 +10,7 @@ Portability : POSIX ...@@ -10,6 +10,7 @@ Portability : POSIX
-} -}
{-# LANGUAGE TemplateHaskell #-} {-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE TypeApplications #-}
{-# LANGUAGE TypeOperators #-} {-# LANGUAGE TypeOperators #-}
module Gargantext.API.Ngrams.List module Gargantext.API.Ngrams.List
...@@ -43,11 +44,11 @@ import Gargantext.Database.Action.Flow.Types (FlowCmdM) ...@@ -43,11 +44,11 @@ import Gargantext.Database.Action.Flow.Types (FlowCmdM)
-- import Gargantext.Database.Action.Metrics.NgramsByContext (refreshNgramsMaterialized) -- import Gargantext.Database.Action.Metrics.NgramsByContext (refreshNgramsMaterialized)
import Gargantext.Database.Admin.Types.Hyperdata.Document import Gargantext.Database.Admin.Types.Hyperdata.Document
import Gargantext.Database.Admin.Types.Node import Gargantext.Database.Admin.Types.Node
import Gargantext.Database.Query.Table.Node (getNode) import Gargantext.Database.Query.Table.Node (getNode, getNodeWith)
import Gargantext.Database.Query.Table.NodeContext (selectDocNodes) import Gargantext.Database.Query.Table.NodeContext (selectDocNodes)
import Gargantext.Database.Schema.Context import Gargantext.Database.Schema.Context
import Gargantext.Database.Schema.Ngrams import Gargantext.Database.Schema.Ngrams
import Gargantext.Database.Schema.Node (_node_parent_id) import Gargantext.Database.Schema.Node (_node_parent_id, node_hyperdata)
import Gargantext.Database.Types (Indexed(..)) import Gargantext.Database.Types (Indexed(..))
import Gargantext.Prelude import Gargantext.Prelude
import Gargantext.Utils.Jobs (serveJobsAPI, MonadJobStatus(..)) import Gargantext.Utils.Jobs (serveJobsAPI, MonadJobStatus(..))
...@@ -64,6 +65,7 @@ import qualified Gargantext.Database.Query.Table.Ngrams as TableNgrams ...@@ -64,6 +65,7 @@ import qualified Gargantext.Database.Query.Table.Ngrams as TableNgrams
import qualified Gargantext.Utils.Servant as GUS import qualified Gargantext.Utils.Servant as GUS
import qualified Prelude import qualified Prelude
import qualified Protolude as P import qualified Protolude as P
import Gargantext.Database.Admin.Types.Hyperdata.Corpus
------------------------------------------------------------------------ ------------------------------------------------------------------------
type GETAPI = Summary "Get List" type GETAPI = Summary "Get List"
:> "lists" :> "lists"
...@@ -154,7 +156,8 @@ reIndexWith :: ( HasNodeStory env err m ...@@ -154,7 +156,8 @@ reIndexWith :: ( HasNodeStory env err m
-> m () -> m ()
reIndexWith cId lId nt lts = do reIndexWith cId lId nt lts = do
-- printDebug "(cId,lId,nt,lts)" (cId, lId, nt, lts) -- printDebug "(cId,lId,nt,lts)" (cId, lId, nt, lts)
-- corpus_node <- getNode cId -- (Proxy :: Proxy HyperdataCorpus) corpus_node <- getNodeWith cId (Proxy @ HyperdataCorpus)
let corpusLang = view (node_hyperdata . to _hc_lang) corpus_node
-- Getting [NgramsTerm] -- Getting [NgramsTerm]
ts <- List.concat ts <- List.concat
...@@ -169,7 +172,7 @@ reIndexWith cId lId nt lts = do ...@@ -169,7 +172,7 @@ reIndexWith cId lId nt lts = do
-- fromListWith (<>) -- fromListWith (<>)
ngramsByDoc = map (HashMap.fromListWith (Map.unionWith (Map.unionWith (\(_a,b) (_a',b') -> (1,b+b'))))) ngramsByDoc = map (HashMap.fromListWith (Map.unionWith (Map.unionWith (\(_a,b) (_a',b') -> (1,b+b')))))
$ map (map (\((k, cnt), v) -> (SimpleNgrams (text2ngrams k), over (traverse . traverse) (\p -> (p, cnt)) v))) $ map (map (\((k, cnt), v) -> (SimpleNgrams (text2ngrams k), over (traverse . traverse) (\p -> (p, cnt)) v)))
$ map (docNgrams Nothing {-here lang-} nt ts) docs $ map (docNgrams corpusLang nt ts) docs
-- Saving the indexation in database -- Saving the indexation in database
_ <- mapM (saveDocNgramsWith lId) ngramsByDoc _ <- mapM (saveDocNgramsWith lId) ngramsByDoc
...@@ -177,7 +180,7 @@ reIndexWith cId lId nt lts = do ...@@ -177,7 +180,7 @@ reIndexWith cId lId nt lts = do
docNgrams :: Maybe Lang docNgrams :: Lang
-> NgramsType -> NgramsType
-> [NgramsTerm] -> [NgramsTerm]
-> Gargantext.Database.Admin.Types.Node.Context HyperdataDocument -> Gargantext.Database.Admin.Types.Node.Context HyperdataDocument
......
...@@ -20,7 +20,7 @@ import Data.Ord ...@@ -20,7 +20,7 @@ import Data.Ord
import Data.Text (Text, concat, unwords) import Data.Text (Text, concat, unwords)
import Gargantext.API.Ngrams.Types (NgramsTerm(..)) import Gargantext.API.Ngrams.Types (NgramsTerm(..))
import Gargantext.Prelude import Gargantext.Prelude
import Gargantext.Core (Lang(ZH)) import Gargantext.Core (Lang(ZH), defaultLanguage)
import Gargantext.Core.Text.Context import Gargantext.Core.Text.Context
import Gargantext.Core.Text.Terms.Mono (monoTextsBySentence) import Gargantext.Core.Text.Terms.Mono (monoTextsBySentence)
import Gargantext.Core.Types (TermsCount) import Gargantext.Core.Types (TermsCount)
...@@ -67,9 +67,9 @@ replaceTerms rplaceTerms pats terms = go 0 ...@@ -67,9 +67,9 @@ replaceTerms rplaceTerms pats terms = go 0
merge (len1, lab1) (len2, lab2) = merge (len1, lab1) (len2, lab2) =
if len2 < len1 then (len1, lab1) else (len2, lab2) if len2 < len1 then (len1, lab1) else (len2, lab2)
buildPatternsWith :: Maybe Lang -> [NgramsTerm] -> Patterns buildPatternsWith :: Lang -> [NgramsTerm] -> Patterns
buildPatternsWith (Just ZH) ts = buildPatterns $ map (\k -> (Text.chunksOf 1 $ unNgramsTerm k, [])) ts buildPatternsWith ZH ts = buildPatterns $ map (\k -> (Text.chunksOf 1 $ unNgramsTerm k, [])) ts
buildPatternsWith _ ts = buildPatterns $ map (\k -> (Text.splitOn " " $ unNgramsTerm k, [])) ts buildPatternsWith _ ts = buildPatterns $ map (\k -> (Text.splitOn " " $ unNgramsTerm k, [])) ts
buildPatterns :: TermList -> Patterns buildPatterns :: TermList -> Patterns
buildPatterns = sortWith (Down . _pat_length) . concatMap buildPattern buildPatterns = sortWith (Down . _pat_length) . concatMap buildPattern
...@@ -86,8 +86,8 @@ buildPatterns = sortWith (Down . _pat_length) . concatMap buildPattern ...@@ -86,8 +86,8 @@ buildPatterns = sortWith (Down . _pat_length) . concatMap buildPattern
-------------------------------------------------------------------------- --------------------------------------------------------------------------
-- Utils -- Utils
type MatchedText = Text type MatchedText = Text
termsInText :: Maybe Lang -> Patterns -> Text -> [(MatchedText, TermsCount)] termsInText :: Lang -> Patterns -> Text -> [(MatchedText, TermsCount)]
termsInText (Just ZH) pats txt = termsInText Nothing pats (addSpaces txt) termsInText ZH pats txt = termsInText defaultLanguage pats (addSpaces txt)
termsInText _ pats txt = groupWithCounts termsInText _ pats txt = groupWithCounts
$ List.concat $ List.concat
$ map (map unwords) $ map (map unwords)
......
...@@ -9,9 +9,12 @@ Portability : POSIX ...@@ -9,9 +9,12 @@ Portability : POSIX
-} -}
{-# LANGUAGE TypeApplications #-}
module Gargantext.Core.Viz.Phylo.API.Tools module Gargantext.Core.Viz.Phylo.API.Tools
where where
import Control.Lens hiding (Context)
import Data.Aeson (Value, decodeFileStrict, eitherDecode, encode) import Data.Aeson (Value, decodeFileStrict, eitherDecode, encode)
import Data.Map.Strict (Map) import Data.Map.Strict (Map)
import Data.Proxy import Data.Proxy
...@@ -24,6 +27,7 @@ import Gargantext.API.Ngrams.Prelude (getTermList) ...@@ -24,6 +27,7 @@ import Gargantext.API.Ngrams.Prelude (getTermList)
import Gargantext.API.Ngrams.Types (NgramsTerm(..)) import Gargantext.API.Ngrams.Types (NgramsTerm(..))
import Gargantext.API.Prelude (GargNoServer) import Gargantext.API.Prelude (GargNoServer)
import Gargantext.Core.Text.Terms.WithList (Patterns, buildPatterns, termsInText) import Gargantext.Core.Text.Terms.WithList (Patterns, buildPatterns, termsInText)
import Gargantext.Core (Lang)
import Gargantext.Core.Types (Context) import Gargantext.Core.Types (Context)
-- import Gargantext.Core.Types.Individu (User(..)) -- import Gargantext.Core.Types.Individu (User(..))
import Gargantext.Core.Types.Main (ListType(MapTerm)) import Gargantext.Core.Types.Main (ListType(MapTerm))
...@@ -34,7 +38,10 @@ import Gargantext.Core.Viz.Phylo.PhyloTools ({-printIOMsg, printIOComment,-} se ...@@ -34,7 +38,10 @@ import Gargantext.Core.Viz.Phylo.PhyloTools ({-printIOMsg, printIOComment,-} se
-- import Gargantext.Database.Action.Flow (getOrMk_RootWithCorpus) -- import Gargantext.Database.Action.Flow (getOrMk_RootWithCorpus)
-- import Gargantext.Database.Admin.Config (userMaster) -- import Gargantext.Database.Admin.Config (userMaster)
-- import Gargantext.Database.Admin.Types.Hyperdata (HyperdataCorpus) -- import Gargantext.Database.Admin.Types.Hyperdata (HyperdataCorpus)
import Gargantext.Database.Admin.Types.Hyperdata (HyperdataPhylo(..)) -- import Gargantext.Database.Action.Flow (getOrMk_RootWithCorpus)
-- import Gargantext.Database.Admin.Config (userMaster)
-- import Gargantext.Database.Admin.Types.Hyperdata (HyperdataCorpus)
import Gargantext.Database.Admin.Types.Hyperdata (HyperdataPhylo(..), HyperdataCorpus(..))
import Gargantext.Database.Admin.Types.Hyperdata.Document (HyperdataDocument(..)) import Gargantext.Database.Admin.Types.Hyperdata.Document (HyperdataDocument(..))
import Gargantext.Database.Admin.Types.Node (CorpusId, ContextId, PhyloId) import Gargantext.Database.Admin.Types.Node (CorpusId, ContextId, PhyloId)
import Gargantext.Database.Query.Table.Node (defaultList, getNodeWith) import Gargantext.Database.Query.Table.Node (defaultList, getNodeWith)
...@@ -101,18 +108,19 @@ corpusIdtoDocuments timeUnit corpusId = do ...@@ -101,18 +108,19 @@ corpusIdtoDocuments timeUnit corpusId = do
docs <- selectDocNodes corpusId docs <- selectDocNodes corpusId
lId <- defaultList corpusId lId <- defaultList corpusId
termList <- getTermList lId MapTerm NgramsTerms termList <- getTermList lId MapTerm NgramsTerms
corpus_node <- getNodeWith corpusId (Proxy @ HyperdataCorpus)
let corpusLang = view (node_hyperdata . to _hc_lang) corpus_node
let patterns = case termList of let patterns = case termList of
Nothing -> panic "[G.C.V.Phylo.API] no termList found" Nothing -> panic "[G.C.V.Phylo.API] no termList found"
Just termList' -> buildPatterns termList' Just termList' -> buildPatterns termList'
pure $ map (toPhyloDocs patterns timeUnit) (map _context_hyperdata docs) pure $ map (toPhyloDocs corpusLang patterns timeUnit) (map _context_hyperdata docs)
-- TODO: Add lang to enable Chinese phylomemy termsInText' :: Lang -> Patterns -> Text -> [Text]
termsInText' :: Patterns -> Text -> [Text] termsInText' lang p t = (map fst) $ termsInText lang p t
termsInText' p t = (map fst) $ termsInText Nothing p t
toPhyloDocs :: Patterns -> TimeUnit -> HyperdataDocument -> Document toPhyloDocs :: Lang -> Patterns -> TimeUnit -> HyperdataDocument -> Document
toPhyloDocs patterns time d = toPhyloDocs lang patterns time d =
let title = fromMaybe "" (_hd_title d) let title = fromMaybe "" (_hd_title d)
abstr = fromMaybe "" (_hd_abstract d) abstr = fromMaybe "" (_hd_abstract d)
in Document (toPhyloDate in Document (toPhyloDate
...@@ -123,7 +131,7 @@ toPhyloDocs patterns time d = ...@@ -123,7 +131,7 @@ toPhyloDocs patterns time d =
(fromIntegral $ fromMaybe 1 $ _hd_publication_year d) (fromIntegral $ fromMaybe 1 $ _hd_publication_year d)
(fromMaybe 1 $ _hd_publication_month d) (fromMaybe 1 $ _hd_publication_month d)
(fromMaybe 1 $ _hd_publication_day d) time) (fromMaybe 1 $ _hd_publication_day d) time)
(termsInText' patterns $ title <> " " <> abstr) Nothing [] time (termsInText' lang patterns $ title <> " " <> abstr) Nothing [] time
......
...@@ -10,6 +10,7 @@ Portability : POSIX ...@@ -10,6 +10,7 @@ Portability : POSIX
-} -}
{-# LANGUAGE MonoLocalBinds #-} {-# LANGUAGE MonoLocalBinds #-}
{-# LANGUAGE TypeApplications #-}
{-# LANGUAGE ViewPatterns #-} {-# LANGUAGE ViewPatterns #-}
module Gargantext.Core.Viz.Phylo.Legacy.LegacyMain module Gargantext.Core.Viz.Phylo.Legacy.LegacyMain
...@@ -17,8 +18,10 @@ module Gargantext.Core.Viz.Phylo.Legacy.LegacyMain ...@@ -17,8 +18,10 @@ module Gargantext.Core.Viz.Phylo.Legacy.LegacyMain
-- import Data.GraphViz -- import Data.GraphViz
-- import qualified Data.ByteString as DB -- import qualified Data.ByteString as DB
import Control.Lens hiding (Level)
import qualified Data.List as List import qualified Data.List as List
import Data.Maybe import Data.Maybe
import Data.Proxy
import Data.Text (Text) import Data.Text (Text)
import Debug.Trace (trace) import Debug.Trace (trace)
import GHC.IO (FilePath) import GHC.IO (FilePath)
...@@ -27,12 +30,13 @@ import Gargantext.API.Ngrams.Types ...@@ -27,12 +30,13 @@ import Gargantext.API.Ngrams.Types
import Gargantext.Database.Admin.Types.Node import Gargantext.Database.Admin.Types.Node
import Gargantext.Core.Text.Context (TermList) import Gargantext.Core.Text.Context (TermList)
import Gargantext.Core.Text.Terms.WithList import Gargantext.Core.Text.Terms.WithList
import Gargantext.Database.Query.Table.Node(defaultList) import Gargantext.Database.Query.Table.Node(defaultList, getNodeWith)
import Gargantext.Prelude import Gargantext.Prelude
import Gargantext.Database.Action.Flow.Types import Gargantext.Database.Action.Flow.Types
import Gargantext.Core.Viz.LegacyPhylo hiding (Svg, Dot) import Gargantext.Core.Viz.LegacyPhylo hiding (Svg, Dot)
import Gargantext.Database.Admin.Types.Hyperdata import Gargantext.Database.Admin.Types.Hyperdata
import Gargantext.Database.Schema.Ngrams (NgramsType(..)) import Gargantext.Database.Schema.Ngrams (NgramsType(..))
import Gargantext.Database.Schema.Node
import Gargantext.Database.Query.Table.NodeContext (selectDocs) import Gargantext.Database.Query.Table.NodeContext (selectDocs)
import Gargantext.Core.Types import Gargantext.Core.Types
import Gargantext.Core (HasDBid) import Gargantext.Core (HasDBid)
...@@ -52,6 +56,8 @@ flowPhylo :: (FlowCmdM env err m, HasDBid NodeType) ...@@ -52,6 +56,8 @@ flowPhylo :: (FlowCmdM env err m, HasDBid NodeType)
-> m Phylo -> m Phylo
flowPhylo cId = do flowPhylo cId = do
corpus_node <- getNodeWith cId (Proxy @ HyperdataCorpus)
let lang = view (node_hyperdata . to _hc_lang) corpus_node
list <- defaultList cId list <- defaultList cId
termList <- HashMap.toList <$> getTermsWith (Text.words . unNgramsTerm) [list] NgramsTerms (Set.singleton MapTerm) termList <- HashMap.toList <$> getTermsWith (Text.words . unNgramsTerm) [list] NgramsTerms (Set.singleton MapTerm)
...@@ -65,7 +71,7 @@ flowPhylo cId = do ...@@ -65,7 +71,7 @@ flowPhylo cId = do
patterns = buildPatterns termList patterns = buildPatterns termList
-- | To filter the Ngrams of a document based on the termList -- | To filter the Ngrams of a document based on the termList
filterTerms :: Patterns -> (Date, Text) -> (Date, [Text]) filterTerms :: Patterns -> (Date, Text) -> (Date, [Text])
filterTerms patterns' (y,d) = (y, fst <$> termsInText Nothing patterns' d) filterTerms patterns' (y,d) = (y, fst <$> termsInText lang patterns' d)
docs = map ((\(y,t) -> Document y t) . filterTerms patterns) docs' docs = map ((\(y,t) -> Document y t) . filterTerms patterns) docs'
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment