Commit 3e7c2638 authored by Alexandre Delanoë's avatar Alexandre Delanoë

[FIX] bugs at import (ngrams must not exceed 255 chars).

parent b34b8baf
Pipeline #224 failed with stage
...@@ -24,3 +24,6 @@ _darcs ...@@ -24,3 +24,6 @@ _darcs
*.pdf *.pdf
# Runtime # Runtime
# Repo
repo.json*
{-|
Module : Gargantext.Core.Metrics.TFICF
Description : Core Metrics TFICF filtering and grouping
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE OverloadedStrings #-}
module Gargantext.Core.Metrics.TFICF
where
import Data.Map
import Gargantext.Prelude
import Gargantext.Database.Metrics.TFICF
import Gargantext.Database.Schema.Ngrams
import Gargantext.Text.Metrics.TFICF
import Gargantext.API.Ngrams
group :: TficfData -> Map NgramsType [NgramsElement] -> TficfData
group = undefined
filter :: TficfData -> [NgramsElement]
filter = undefined
...@@ -29,6 +29,7 @@ import Data.Map (Map, lookup, fromListWith, toList) ...@@ -29,6 +29,7 @@ import Data.Map (Map, lookup, fromListWith, toList)
import Data.Maybe (Maybe(..), catMaybes) import Data.Maybe (Maybe(..), catMaybes)
import Data.Monoid import Data.Monoid
import Data.Text (Text, splitOn, intercalate) import Data.Text (Text, splitOn, intercalate)
import qualified Data.Text as Text
import Data.Tuple.Extra (both) import Data.Tuple.Extra (both)
import Data.List (concat) import Data.List (concat)
import GHC.Show (Show) import GHC.Show (Show)
...@@ -38,7 +39,7 @@ import Gargantext.Core.Types.Main ...@@ -38,7 +39,7 @@ import Gargantext.Core.Types.Main
import Gargantext.Core (Lang(..)) import Gargantext.Core (Lang(..))
import Gargantext.Database.Config (userMaster, userArbitrary, corpusMasterName) import Gargantext.Database.Config (userMaster, userArbitrary, corpusMasterName)
import Gargantext.Database.Flow.Utils (insertToNodeNgrams) import Gargantext.Database.Flow.Utils (insertToNodeNgrams)
import Gargantext.Database.Metrics.TFICF (getTficf) --import Gargantext.Database.Metrics.TFICF (getTficf)
import Gargantext.Text.Terms (extractTerms) import Gargantext.Text.Terms (extractTerms)
import Gargantext.Text.Metrics.TFICF (Tficf(..)) import Gargantext.Text.Metrics.TFICF (Tficf(..))
import Gargantext.Database.Metrics.Count (getNgramsElementsWithParentNodeId) import Gargantext.Database.Metrics.Count (getNgramsElementsWithParentNodeId)
...@@ -205,10 +206,10 @@ toInserted :: [ReturnId] -> Map HashId ReturnId ...@@ -205,10 +206,10 @@ toInserted :: [ReturnId] -> Map HashId ReturnId
toInserted = DM.fromList . map (\r -> (reUniqId r, r) ) toInserted = DM.fromList . map (\r -> (reUniqId r, r) )
. filter (\r -> reInserted r == True) . filter (\r -> reInserted r == True)
data DocumentWithId = data DocumentWithId = DocumentWithId
DocumentWithId { documentId :: !NodeId { documentId :: !NodeId
, documentData :: !HyperdataDocument , documentData :: !HyperdataDocument
} deriving (Show) } deriving (Show)
mergeData :: Map HashId ReturnId mergeData :: Map HashId ReturnId
-> Map HashId HyperdataDocument -> Map HashId HyperdataDocument
...@@ -220,17 +221,23 @@ mergeData rs = catMaybes . map toDocumentWithId . DM.toList ...@@ -220,17 +221,23 @@ mergeData rs = catMaybes . map toDocumentWithId . DM.toList
<*> Just hpd <*> Just hpd
------------------------------------------------------------------------ ------------------------------------------------------------------------
data DocumentIdWithNgrams = data DocumentIdWithNgrams = DocumentIdWithNgrams
DocumentIdWithNgrams { documentWithId :: !DocumentWithId
{ documentWithId :: !DocumentWithId , document_ngrams :: !(Map Ngrams (Map NgramsType Int))
, document_ngrams :: !(Map Ngrams (Map NgramsType Int)) } deriving (Show)
} deriving (Show)
-- TODO group terms -- TODO group terms
extractNgramsT :: HasNodeError err extractNgramsT :: HasNodeError err
=> HyperdataDocument => HyperdataDocument
-> Cmd err (Map Ngrams (Map NgramsType Int)) -> Cmd err (Map Ngrams (Map NgramsType Int))
extractNgramsT doc = do extractNgramsT hd = filterNgramsT 255 <$> extractNgramsT' hd
extractNgramsT' :: HasNodeError err
=> HyperdataDocument
-> Cmd err (Map Ngrams (Map NgramsType Int))
extractNgramsT' doc = do
let source = text2ngrams let source = text2ngrams
$ maybe "Nothing" identity $ maybe "Nothing" identity
$ _hyperdataDocument_source doc $ _hyperdataDocument_source doc
...@@ -257,7 +264,15 @@ extractNgramsT doc = do ...@@ -257,7 +264,15 @@ extractNgramsT doc = do
<> [(a', DM.singleton Authors 1) | a' <- authors ] <> [(a', DM.singleton Authors 1) | a' <- authors ]
<> [(t', DM.singleton NgramsTerms 1) | t' <- terms' ] <> [(t', DM.singleton NgramsTerms 1) | t' <- terms' ]
--{-
filterNgramsT :: Int -> Map Ngrams (Map NgramsType Int)
-> Map Ngrams (Map NgramsType Int)
filterNgramsT s ms = DM.fromList $ map (\a -> filter' s a) $ DM.toList ms
where
filter' s' (ng@(Ngrams t n),y) = case (Text.length t) < s' of
True -> (ng,y)
False -> (Ngrams (Text.take s' t) n , y)
--}
documentIdWithNgrams :: HasNodeError err documentIdWithNgrams :: HasNodeError err
=> (HyperdataDocument => (HyperdataDocument
...@@ -310,7 +325,7 @@ flowListUser :: FlowCmdM env err m ...@@ -310,7 +325,7 @@ flowListUser :: FlowCmdM env err m
-> Map NgramsType [NgramsElement] -> Map NgramsType [NgramsElement]
-> Int -> Int
-> m ListId -> m ListId
flowListUser uId cId ngsM n = do flowListUser uId cId ngsM _n = do
lId <- getOrMkList cId uId lId <- getOrMkList cId uId
{- {-
......
{-| {-|
Module : Gargantext.Database.Metrics.TFICF Module : Gargantext.Database.Metrics.TFICF
Description : Ngram connection to the Database Description : Building TFICF Data from Database
Copyright : (c) CNRS, 2017-Present Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3 License : AGPL + CECILL v3
Maintainer : team@gargantext.org Maintainer : team@gargantext.org
Stability : experimental Stability : experimental
Portability : POSIX Portability : POSIX
TFICF, generalization of TFIDF
-} -}
{-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE QuasiQuotes #-} {-# LANGUAGE QuasiQuotes #-}
{-# LANGUAGE RankNTypes #-} {-# LANGUAGE RankNTypes #-}
module Gargantext.Database.Metrics.TFICF where module Gargantext.Database.Metrics.TFICF where
...@@ -22,13 +21,13 @@ import Data.Text (Text) ...@@ -22,13 +21,13 @@ import Data.Text (Text)
import Database.PostgreSQL.Simple.SqlQQ (sql) import Database.PostgreSQL.Simple.SqlQQ (sql)
import qualified Database.PostgreSQL.Simple as DPS import qualified Database.PostgreSQL.Simple as DPS
import Safe (headMay) import Safe (headMay)
import Gargantext.Text.Metrics.TFICF -- (tficf) --import Gargantext.Text.Metrics.TFICF -- (tficf)
import Gargantext.Prelude import Gargantext.Prelude
import Gargantext.Core.Types.Individu (UsernameMaster) import Gargantext.Core.Types.Individu (UsernameMaster)
import Gargantext.Database.Utils (Cmd, runPGSQuery) import Gargantext.Database.Utils (Cmd, runPGSQuery)
import Gargantext.Database.Types.Node (ListId, CorpusId, NodeType(..)) import Gargantext.Database.Types.Node ({-ListId,-} CorpusId, NodeType(..))
import Gargantext.Database.Config (nodeTypeId) import Gargantext.Database.Config (nodeTypeId)
import Gargantext.Database.Schema.Ngrams (NgramsId, NgramsTerms, NgramsType, ngramsTypeId) import Gargantext.Database.Schema.Ngrams ({-NgramsId, NgramsTerms,-} NgramsType, ngramsTypeId)
type OccGlobal = Double type OccGlobal = Double
type OccCorpus = Double type OccCorpus = Double
...@@ -45,6 +44,7 @@ data TficfData = TficfData ...@@ -45,6 +44,7 @@ data TficfData = TficfData
, td_terms :: ![TficfTerms] , td_terms :: ![TficfTerms]
} deriving (Show) } deriving (Show)
getTficf :: UsernameMaster -> CorpusId -> NgramsType getTficf :: UsernameMaster -> CorpusId -> NgramsType
-> Cmd err TficfData -> Cmd err TficfData
getTficf u cId ngType = do getTficf u cId ngType = do
......
...@@ -89,8 +89,8 @@ ALTER TABLE public.nodes_ngrams_ngrams OWNER TO gargantua; ...@@ -89,8 +89,8 @@ ALTER TABLE public.nodes_ngrams_ngrams OWNER TO gargantua;
--------------------------------------------------------- ---------------------------------------------------------
CREATE TABLE public.nodes_nodes ( CREATE TABLE public.nodes_nodes (
node1_id integer NOT NULL, node1_id integer NOT NULL REFERENCES public.nodes(id) ON DELETE CASCADE,
node2_id integer NOT NULL, node2_id integer NOT NULL REFERENCES public.nodes(id) ON DELETE CASCADE,
score real, score real,
favorite boolean, favorite boolean,
delete boolean, delete boolean,
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment