Commit 6219c04b authored by Alfredo Di Napoli's avatar Alfredo Di Napoli

Untested, tentative support for passing Lang in a few places (fixes #250)

parent e209df2e
Pipeline #4382 passed with stage
in 28 seconds
......@@ -10,6 +10,7 @@ Portability : POSIX
-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE TypeApplications #-}
{-# LANGUAGE TypeOperators #-}
module Gargantext.API.Ngrams.List
......@@ -43,11 +44,11 @@ import Gargantext.Database.Action.Flow.Types (FlowCmdM)
-- import Gargantext.Database.Action.Metrics.NgramsByContext (refreshNgramsMaterialized)
import Gargantext.Database.Admin.Types.Hyperdata.Document
import Gargantext.Database.Admin.Types.Node
import Gargantext.Database.Query.Table.Node (getNode)
import Gargantext.Database.Query.Table.Node (getNode, getNodeWith)
import Gargantext.Database.Query.Table.NodeContext (selectDocNodes)
import Gargantext.Database.Schema.Context
import Gargantext.Database.Schema.Ngrams
import Gargantext.Database.Schema.Node (_node_parent_id)
import Gargantext.Database.Schema.Node (_node_parent_id, node_hyperdata)
import Gargantext.Database.Types (Indexed(..))
import Gargantext.Prelude
import Gargantext.Utils.Jobs (serveJobsAPI, MonadJobStatus(..))
......@@ -64,6 +65,7 @@ import qualified Gargantext.Database.Query.Table.Ngrams as TableNgrams
import qualified Gargantext.Utils.Servant as GUS
import qualified Prelude
import qualified Protolude as P
import Gargantext.Database.Admin.Types.Hyperdata.Corpus
------------------------------------------------------------------------
type GETAPI = Summary "Get List"
:> "lists"
......@@ -154,7 +156,8 @@ reIndexWith :: ( HasNodeStory env err m
-> m ()
reIndexWith cId lId nt lts = do
-- printDebug "(cId,lId,nt,lts)" (cId, lId, nt, lts)
-- corpus_node <- getNode cId -- (Proxy :: Proxy HyperdataCorpus)
corpus_node <- getNodeWith cId (Proxy @ HyperdataCorpus)
let corpusLang = view (node_hyperdata . to _hc_lang) corpus_node
-- Getting [NgramsTerm]
ts <- List.concat
......@@ -169,7 +172,7 @@ reIndexWith cId lId nt lts = do
-- fromListWith (<>)
ngramsByDoc = map (HashMap.fromListWith (Map.unionWith (Map.unionWith (\(_a,b) (_a',b') -> (1,b+b')))))
$ map (map (\((k, cnt), v) -> (SimpleNgrams (text2ngrams k), over (traverse . traverse) (\p -> (p, cnt)) v)))
$ map (docNgrams Nothing {-here lang-} nt ts) docs
$ map (docNgrams corpusLang nt ts) docs
-- Saving the indexation in database
_ <- mapM (saveDocNgramsWith lId) ngramsByDoc
......@@ -177,7 +180,7 @@ reIndexWith cId lId nt lts = do
docNgrams :: Maybe Lang
docNgrams :: Lang
-> NgramsType
-> [NgramsTerm]
-> Gargantext.Database.Admin.Types.Node.Context HyperdataDocument
......
......@@ -20,7 +20,7 @@ import Data.Ord
import Data.Text (Text, concat, unwords)
import Gargantext.API.Ngrams.Types (NgramsTerm(..))
import Gargantext.Prelude
import Gargantext.Core (Lang(ZH))
import Gargantext.Core (Lang(ZH), defaultLanguage)
import Gargantext.Core.Text.Context
import Gargantext.Core.Text.Terms.Mono (monoTextsBySentence)
import Gargantext.Core.Types (TermsCount)
......@@ -67,9 +67,9 @@ replaceTerms rplaceTerms pats terms = go 0
merge (len1, lab1) (len2, lab2) =
if len2 < len1 then (len1, lab1) else (len2, lab2)
buildPatternsWith :: Maybe Lang -> [NgramsTerm] -> Patterns
buildPatternsWith (Just ZH) ts = buildPatterns $ map (\k -> (Text.chunksOf 1 $ unNgramsTerm k, [])) ts
buildPatternsWith _ ts = buildPatterns $ map (\k -> (Text.splitOn " " $ unNgramsTerm k, [])) ts
buildPatternsWith :: Lang -> [NgramsTerm] -> Patterns
buildPatternsWith ZH ts = buildPatterns $ map (\k -> (Text.chunksOf 1 $ unNgramsTerm k, [])) ts
buildPatternsWith _ ts = buildPatterns $ map (\k -> (Text.splitOn " " $ unNgramsTerm k, [])) ts
buildPatterns :: TermList -> Patterns
buildPatterns = sortWith (Down . _pat_length) . concatMap buildPattern
......@@ -86,8 +86,8 @@ buildPatterns = sortWith (Down . _pat_length) . concatMap buildPattern
--------------------------------------------------------------------------
-- Utils
type MatchedText = Text
termsInText :: Maybe Lang -> Patterns -> Text -> [(MatchedText, TermsCount)]
termsInText (Just ZH) pats txt = termsInText Nothing pats (addSpaces txt)
termsInText :: Lang -> Patterns -> Text -> [(MatchedText, TermsCount)]
termsInText ZH pats txt = termsInText defaultLanguage pats (addSpaces txt)
termsInText _ pats txt = groupWithCounts
$ List.concat
$ map (map unwords)
......
......@@ -9,9 +9,12 @@ Portability : POSIX
-}
{-# LANGUAGE TypeApplications #-}
module Gargantext.Core.Viz.Phylo.API.Tools
where
import Control.Lens hiding (Context)
import Data.Aeson (Value, decodeFileStrict, eitherDecode, encode)
import Data.Map.Strict (Map)
import Data.Proxy
......@@ -24,6 +27,7 @@ import Gargantext.API.Ngrams.Prelude (getTermList)
import Gargantext.API.Ngrams.Types (NgramsTerm(..))
import Gargantext.API.Prelude (GargNoServer)
import Gargantext.Core.Text.Terms.WithList (Patterns, buildPatterns, termsInText)
import Gargantext.Core (Lang)
import Gargantext.Core.Types (Context)
-- import Gargantext.Core.Types.Individu (User(..))
import Gargantext.Core.Types.Main (ListType(MapTerm))
......@@ -34,7 +38,10 @@ import Gargantext.Core.Viz.Phylo.PhyloTools ({-printIOMsg, printIOComment,-} se
-- import Gargantext.Database.Action.Flow (getOrMk_RootWithCorpus)
-- import Gargantext.Database.Admin.Config (userMaster)
-- import Gargantext.Database.Admin.Types.Hyperdata (HyperdataCorpus)
import Gargantext.Database.Admin.Types.Hyperdata (HyperdataPhylo(..))
-- import Gargantext.Database.Action.Flow (getOrMk_RootWithCorpus)
-- import Gargantext.Database.Admin.Config (userMaster)
-- import Gargantext.Database.Admin.Types.Hyperdata (HyperdataCorpus)
import Gargantext.Database.Admin.Types.Hyperdata (HyperdataPhylo(..), HyperdataCorpus(..))
import Gargantext.Database.Admin.Types.Hyperdata.Document (HyperdataDocument(..))
import Gargantext.Database.Admin.Types.Node (CorpusId, ContextId, PhyloId)
import Gargantext.Database.Query.Table.Node (defaultList, getNodeWith)
......@@ -101,18 +108,19 @@ corpusIdtoDocuments timeUnit corpusId = do
docs <- selectDocNodes corpusId
lId <- defaultList corpusId
termList <- getTermList lId MapTerm NgramsTerms
corpus_node <- getNodeWith corpusId (Proxy @ HyperdataCorpus)
let corpusLang = view (node_hyperdata . to _hc_lang) corpus_node
let patterns = case termList of
Nothing -> panic "[G.C.V.Phylo.API] no termList found"
Just termList' -> buildPatterns termList'
pure $ map (toPhyloDocs patterns timeUnit) (map _context_hyperdata docs)
pure $ map (toPhyloDocs corpusLang patterns timeUnit) (map _context_hyperdata docs)
-- TODO: Add lang to enable Chinese phylomemy
termsInText' :: Patterns -> Text -> [Text]
termsInText' p t = (map fst) $ termsInText Nothing p t
termsInText' :: Lang -> Patterns -> Text -> [Text]
termsInText' lang p t = (map fst) $ termsInText lang p t
toPhyloDocs :: Patterns -> TimeUnit -> HyperdataDocument -> Document
toPhyloDocs patterns time d =
toPhyloDocs :: Lang -> Patterns -> TimeUnit -> HyperdataDocument -> Document
toPhyloDocs lang patterns time d =
let title = fromMaybe "" (_hd_title d)
abstr = fromMaybe "" (_hd_abstract d)
in Document (toPhyloDate
......@@ -123,7 +131,7 @@ toPhyloDocs patterns time d =
(fromIntegral $ fromMaybe 1 $ _hd_publication_year d)
(fromMaybe 1 $ _hd_publication_month d)
(fromMaybe 1 $ _hd_publication_day d) time)
(termsInText' patterns $ title <> " " <> abstr) Nothing [] time
(termsInText' lang patterns $ title <> " " <> abstr) Nothing [] time
......
......@@ -10,6 +10,7 @@ Portability : POSIX
-}
{-# LANGUAGE MonoLocalBinds #-}
{-# LANGUAGE TypeApplications #-}
{-# LANGUAGE ViewPatterns #-}
module Gargantext.Core.Viz.Phylo.Legacy.LegacyMain
......@@ -17,8 +18,10 @@ module Gargantext.Core.Viz.Phylo.Legacy.LegacyMain
-- import Data.GraphViz
-- import qualified Data.ByteString as DB
import Control.Lens hiding (Level)
import qualified Data.List as List
import Data.Maybe
import Data.Proxy
import Data.Text (Text)
import Debug.Trace (trace)
import GHC.IO (FilePath)
......@@ -27,12 +30,13 @@ import Gargantext.API.Ngrams.Types
import Gargantext.Database.Admin.Types.Node
import Gargantext.Core.Text.Context (TermList)
import Gargantext.Core.Text.Terms.WithList
import Gargantext.Database.Query.Table.Node(defaultList)
import Gargantext.Database.Query.Table.Node(defaultList, getNodeWith)
import Gargantext.Prelude
import Gargantext.Database.Action.Flow.Types
import Gargantext.Core.Viz.LegacyPhylo hiding (Svg, Dot)
import Gargantext.Database.Admin.Types.Hyperdata
import Gargantext.Database.Schema.Ngrams (NgramsType(..))
import Gargantext.Database.Schema.Node
import Gargantext.Database.Query.Table.NodeContext (selectDocs)
import Gargantext.Core.Types
import Gargantext.Core (HasDBid)
......@@ -52,6 +56,8 @@ flowPhylo :: (FlowCmdM env err m, HasDBid NodeType)
-> m Phylo
flowPhylo cId = do
corpus_node <- getNodeWith cId (Proxy @ HyperdataCorpus)
let lang = view (node_hyperdata . to _hc_lang) corpus_node
list <- defaultList cId
termList <- HashMap.toList <$> getTermsWith (Text.words . unNgramsTerm) [list] NgramsTerms (Set.singleton MapTerm)
......@@ -65,7 +71,7 @@ flowPhylo cId = do
patterns = buildPatterns termList
-- | To filter the Ngrams of a document based on the termList
filterTerms :: Patterns -> (Date, Text) -> (Date, [Text])
filterTerms patterns' (y,d) = (y, fst <$> termsInText Nothing patterns' d)
filterTerms patterns' (y,d) = (y, fst <$> termsInText lang patterns' d)
docs = map ((\(y,t) -> Document y t) . filterTerms patterns) docs'
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment