Commit 3e7c2638 authored by Alexandre Delanoë's avatar Alexandre Delanoë

[FIX] bugs at import (ngrams must not exceed 255 chars).

parent b34b8baf
......@@ -24,3 +24,6 @@ _darcs
*.pdf
# Runtime
# Repo
repo.json*
{-|
Module : Gargantext.Core.Metrics.TFICF
Description : Core Metrics TFICF filtering and grouping
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE OverloadedStrings #-}
module Gargantext.Core.Metrics.TFICF
where
import Data.Map
import Gargantext.Prelude
import Gargantext.Database.Metrics.TFICF
import Gargantext.Database.Schema.Ngrams
import Gargantext.Text.Metrics.TFICF
import Gargantext.API.Ngrams
group :: TficfData -> Map NgramsType [NgramsElement] -> TficfData
group = undefined
filter :: TficfData -> [NgramsElement]
filter = undefined
......@@ -29,6 +29,7 @@ import Data.Map (Map, lookup, fromListWith, toList)
import Data.Maybe (Maybe(..), catMaybes)
import Data.Monoid
import Data.Text (Text, splitOn, intercalate)
import qualified Data.Text as Text
import Data.Tuple.Extra (both)
import Data.List (concat)
import GHC.Show (Show)
......@@ -38,7 +39,7 @@ import Gargantext.Core.Types.Main
import Gargantext.Core (Lang(..))
import Gargantext.Database.Config (userMaster, userArbitrary, corpusMasterName)
import Gargantext.Database.Flow.Utils (insertToNodeNgrams)
import Gargantext.Database.Metrics.TFICF (getTficf)
--import Gargantext.Database.Metrics.TFICF (getTficf)
import Gargantext.Text.Terms (extractTerms)
import Gargantext.Text.Metrics.TFICF (Tficf(..))
import Gargantext.Database.Metrics.Count (getNgramsElementsWithParentNodeId)
......@@ -205,10 +206,10 @@ toInserted :: [ReturnId] -> Map HashId ReturnId
toInserted = DM.fromList . map (\r -> (reUniqId r, r) )
. filter (\r -> reInserted r == True)
data DocumentWithId =
DocumentWithId { documentId :: !NodeId
, documentData :: !HyperdataDocument
} deriving (Show)
data DocumentWithId = DocumentWithId
{ documentId :: !NodeId
, documentData :: !HyperdataDocument
} deriving (Show)
mergeData :: Map HashId ReturnId
-> Map HashId HyperdataDocument
......@@ -220,17 +221,23 @@ mergeData rs = catMaybes . map toDocumentWithId . DM.toList
<*> Just hpd
------------------------------------------------------------------------
data DocumentIdWithNgrams =
DocumentIdWithNgrams
{ documentWithId :: !DocumentWithId
, document_ngrams :: !(Map Ngrams (Map NgramsType Int))
} deriving (Show)
data DocumentIdWithNgrams = DocumentIdWithNgrams
{ documentWithId :: !DocumentWithId
, document_ngrams :: !(Map Ngrams (Map NgramsType Int))
} deriving (Show)
-- TODO group terms
extractNgramsT :: HasNodeError err
=> HyperdataDocument
-> Cmd err (Map Ngrams (Map NgramsType Int))
extractNgramsT doc = do
extractNgramsT hd = filterNgramsT 255 <$> extractNgramsT' hd
extractNgramsT' :: HasNodeError err
=> HyperdataDocument
-> Cmd err (Map Ngrams (Map NgramsType Int))
extractNgramsT' doc = do
let source = text2ngrams
$ maybe "Nothing" identity
$ _hyperdataDocument_source doc
......@@ -257,7 +264,15 @@ extractNgramsT doc = do
<> [(a', DM.singleton Authors 1) | a' <- authors ]
<> [(t', DM.singleton NgramsTerms 1) | t' <- terms' ]
--{-
filterNgramsT :: Int -> Map Ngrams (Map NgramsType Int)
-> Map Ngrams (Map NgramsType Int)
filterNgramsT s ms = DM.fromList $ map (\a -> filter' s a) $ DM.toList ms
where
filter' s' (ng@(Ngrams t n),y) = case (Text.length t) < s' of
True -> (ng,y)
False -> (Ngrams (Text.take s' t) n , y)
--}
documentIdWithNgrams :: HasNodeError err
=> (HyperdataDocument
......@@ -310,7 +325,7 @@ flowListUser :: FlowCmdM env err m
-> Map NgramsType [NgramsElement]
-> Int
-> m ListId
flowListUser uId cId ngsM n = do
flowListUser uId cId ngsM _n = do
lId <- getOrMkList cId uId
{-
......
{-|
Module : Gargantext.Database.Metrics.TFICF
Description : Ngram connection to the Database
Description : Building TFICF Data from Database
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
TFICF, generalization of TFIDF
-}
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE QuasiQuotes #-}
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE QuasiQuotes #-}
{-# LANGUAGE RankNTypes #-}
module Gargantext.Database.Metrics.TFICF where
......@@ -22,13 +21,13 @@ import Data.Text (Text)
import Database.PostgreSQL.Simple.SqlQQ (sql)
import qualified Database.PostgreSQL.Simple as DPS
import Safe (headMay)
import Gargantext.Text.Metrics.TFICF -- (tficf)
--import Gargantext.Text.Metrics.TFICF -- (tficf)
import Gargantext.Prelude
import Gargantext.Core.Types.Individu (UsernameMaster)
import Gargantext.Database.Utils (Cmd, runPGSQuery)
import Gargantext.Database.Types.Node (ListId, CorpusId, NodeType(..))
import Gargantext.Database.Types.Node ({-ListId,-} CorpusId, NodeType(..))
import Gargantext.Database.Config (nodeTypeId)
import Gargantext.Database.Schema.Ngrams (NgramsId, NgramsTerms, NgramsType, ngramsTypeId)
import Gargantext.Database.Schema.Ngrams ({-NgramsId, NgramsTerms,-} NgramsType, ngramsTypeId)
type OccGlobal = Double
type OccCorpus = Double
......@@ -45,6 +44,7 @@ data TficfData = TficfData
, td_terms :: ![TficfTerms]
} deriving (Show)
getTficf :: UsernameMaster -> CorpusId -> NgramsType
-> Cmd err TficfData
getTficf u cId ngType = do
......
......@@ -89,8 +89,8 @@ ALTER TABLE public.nodes_ngrams_ngrams OWNER TO gargantua;
---------------------------------------------------------
CREATE TABLE public.nodes_nodes (
node1_id integer NOT NULL,
node2_id integer NOT NULL,
node1_id integer NOT NULL REFERENCES public.nodes(id) ON DELETE CASCADE,
node2_id integer NOT NULL REFERENCES public.nodes(id) ON DELETE CASCADE,
score real,
favorite boolean,
delete boolean,
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment