Commit 8de56ef9 authored by Alexandre Delanoë's avatar Alexandre Delanoë

Merge remote-tracking branch 'origin/adinapoli/issue-331' into testing

parents a8b3d08c 37979721
......@@ -629,6 +629,7 @@ library
, tomland >= 1.3.3.2
, tuple ^>= 0.3.0.2
, unordered-containers ^>= 0.2.16.0
, unicode-collation >= 0.1.3.6
, uri-encode ^>= 1.5.0.7
, utf8-string ^>= 1.0.2
, uuid ^>= 1.3.15
......@@ -960,6 +961,7 @@ test-suite garg-test-tasty
, tmp-postgres >= 1.34.1 && < 1.35
, tree-diff
, unordered-containers ^>= 0.2.16.0
, unicode-collation >= 0.1.3.6
, validity ^>= 0.11.0.1
, vector ^>= 0.12.3.0
, wai
......@@ -1048,6 +1050,7 @@ test-suite garg-test-hspec
, text ^>= 1.2.4.1
, time ^>= 1.9.3
, tmp-postgres >= 1.34.1 && < 1.35
, tree-diff
, unordered-containers ^>= 0.2.16.0
, validity ^>= 0.11.0.1
, wai
......
......@@ -121,6 +121,7 @@ import Gargantext.Prelude.Clock (hasTime, getTime)
import Gargantext.Utils.Jobs (serveJobsAPI, MonadJobStatus(..))
import Gargantext.Utils.Jobs.Monad (markFailedNoErr)
import Servant hiding (Patch)
import Text.Collate qualified as Unicode
{-
-- TODO sequences of modifications (Patchs)
......@@ -557,13 +558,21 @@ searchTableNgrams versionedTableMap NgramsSearchQuery{..} =
&& _nsq_searchQuery (inputNode ^. ne_ngrams)
&& matchesListType (inputNode ^. ne_list)
-- Sorts the input 'NgramsElement' list.
-- /IMPORTANT/: As we might be sorting ngrams in all sorts of language,
-- some of them might include letters with accents and other unicode symbols,
-- but we need to filter those /diacritics/ out so that the sorting would
-- happen in the way users would expect. See ticket #331.
sortOnOrder :: Maybe OrderBy -> ([NgramsElement] -> [NgramsElement])
sortOnOrder Nothing = sortOnOrder (Just ScoreDesc)
sortOnOrder (Just TermAsc) = List.sortOn $ view ne_ngrams
sortOnOrder (Just TermDesc) = List.sortOn $ Down . view ne_ngrams
sortOnOrder (Just TermAsc) = List.sortBy ngramTermsAscSorter
sortOnOrder (Just TermDesc) = List.sortBy ngramTermsDescSorter
sortOnOrder (Just ScoreAsc) = List.sortOn $ view (ne_occurrences . to Set.size)
sortOnOrder (Just ScoreDesc) = List.sortOn $ Down . view (ne_occurrences . to Set.size)
ngramTermsAscSorter = on unicodeDUCETSorter (unNgramsTerm . view ne_ngrams)
ngramTermsDescSorter = on (\n1 n2 -> unicodeDUCETSorter n2 n1) (unNgramsTerm . view ne_ngrams)
-- | Filters the given `tableMap` with the search criteria. It returns
-- a set of 'NgramsElement' all matching the input 'NGramsSearchQuery'.
filterNodes :: Map NgramsTerm NgramsElement -> Set NgramsElement
......@@ -596,6 +605,13 @@ searchTableNgrams versionedTableMap NgramsSearchQuery{..} =
. Set.toList
$ xs
-- | This function allows sorting two texts via their unicode sorting
-- (as opposed as the standard lexicographical sorting) by relying on
-- the DUCET table, a table that specifies the ordering of all unicode
-- characters. This is enough for mimicking the \"natural sort\" effect
-- that users would expect.
unicodeDUCETSorter :: Text -> Text -> Ordering
unicodeDUCETSorter = Unicode.collate Unicode.rootCollator
getTableNgrams :: forall env err m.
( HasNodeStory env err m
......
......@@ -8,12 +8,14 @@ Stability : experimental
Portability : POSIX
-}
{-# LANGUAGE ConstraintKinds #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE TypeOperators #-}
{-# LANGUAGE TypeFamilies #-}
{-# OPTIONS -fno-warn-orphans #-}
{-# LANGUAGE DerivingStrategies #-}
{-# LANGUAGE DeriveAnyClass #-}
{-# LANGUAGE ConstraintKinds #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE TypeOperators #-}
{-# LANGUAGE TypeFamilies #-}
{-# OPTIONS -fno-warn-orphans #-}
module Gargantext.API.Ngrams.Types where
......@@ -33,6 +35,7 @@ import Data.Set qualified as Set
import Data.String (IsString(..))
import Data.Swagger ( NamedSchema(NamedSchema), declareSchemaRef, genericDeclareNamedSchema, SwaggerType(SwaggerObject), ToParamSchema, ToSchema(..), HasProperties(properties), HasRequired(required), HasType(type_) )
import Data.Text qualified as T
import Data.TreeDiff
import Data.Validity ( Validity(..) )
import Database.PostgreSQL.Simple.FromField (FromField, fromField, fromJSONField)
import Database.PostgreSQL.Simple.ToField (ToField, toJSONField, toField)
......@@ -92,7 +95,9 @@ instance ToJSONKey TabType where
toJSONKey = genericToJSONKey defaultJSONKeyOptions
newtype MSet a = MSet (Map a ())
deriving (Eq, Ord, Show, Generic, Arbitrary, Semigroup, Monoid)
deriving stock (Eq, Ord, Show, Generic)
deriving newtype (Arbitrary, Semigroup, Monoid)
deriving anyclass (ToExpr)
instance ToJSON a => ToJSON (MSet a) where
toJSON (MSet m) = toJSON (Map.keys m)
......@@ -124,7 +129,9 @@ instance (ToJSONKey a, ToSchema a) => ToSchema (MSet a) where
------------------------------------------------------------------------
newtype NgramsTerm = NgramsTerm { unNgramsTerm :: Text }
deriving (Ord, Eq, Show, Generic, ToJSONKey, ToJSON, FromJSON, Semigroup, Arbitrary, Serialise, ToSchema, Hashable, NFData, FromField, ToField)
deriving (Ord, Eq, Show, Generic)
deriving newtype (ToJSONKey, ToJSON, FromJSON, Semigroup, Arbitrary, Serialise, ToSchema, Hashable, NFData, FromField, ToField)
deriving anyclass (ToExpr)
instance IsHashable NgramsTerm where
hash (NgramsTerm t) = hash t
instance Monoid NgramsTerm where
......@@ -175,7 +182,8 @@ data NgramsElement =
, _ne_parent :: Maybe NgramsTerm
, _ne_children :: MSet NgramsTerm
}
deriving (Ord, Eq, Show, Generic)
deriving stock (Ord, Eq, Show, Generic)
deriving anyclass (ToExpr)
deriveJSON (unPrefix "_ne_") ''NgramsElement
makeLenses ''NgramsElement
......@@ -197,7 +205,9 @@ instance ToSchema NgramsElement where
------------------------------------------------------------------------
newtype NgramsTable = NgramsTable [NgramsElement]
deriving (Ord, Eq, Generic, ToJSON, FromJSON, Show)
deriving stock (Ord, Eq, Generic, Show)
deriving newtype (ToJSON, FromJSON)
deriving anyclass (ToExpr)
-- type NgramsList = NgramsTable
......@@ -385,8 +395,8 @@ isRem = (== remPatch)
type PatchMap = PM.PatchMap
newtype PatchMSet a = PatchMSet (PatchMap a AddRem)
deriving (Eq, Show, Generic, Validity, Semigroup, Monoid, Group,
Transformable, Composable)
deriving stock (Eq, Show, Generic)
deriving newtype (Validity, Semigroup, Monoid, Group, Transformable, Composable)
unPatchMSet :: PatchMSet a -> PatchMap a AddRem
unPatchMSet (PatchMSet a) = a
......@@ -538,7 +548,8 @@ instance Action (Replace ListType) NgramsRepoElement where
act replaceP = over nre_list (act replaceP)
newtype NgramsTablePatch = NgramsTablePatch (PatchMap NgramsTerm NgramsPatch)
deriving (Eq, Show, Generic, ToJSON, FromJSON, Semigroup, Monoid, Validity, Transformable)
deriving stock (Eq, Show, Generic)
deriving newtype (ToJSON, FromJSON, Semigroup, Monoid, Validity, Transformable)
mkNgramsTablePatch :: Map NgramsTerm NgramsPatch -> NgramsTablePatch
mkNgramsTablePatch = NgramsTablePatch . PM.fromMap
......@@ -699,7 +710,8 @@ data VersionedWithCount a = VersionedWithCount
, _vc_count :: Count
, _vc_data :: a
}
deriving (Generic, Show, Eq)
deriving stock (Generic, Show, Eq)
deriving anyclass ToExpr
deriveJSON (unPrefix "_vc_") ''VersionedWithCount
makeLenses ''VersionedWithCount
instance (Typeable a, ToSchema a) => ToSchema (VersionedWithCount a) where
......
......@@ -12,6 +12,7 @@ Portability : POSIX
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE DeriveAnyClass #-}
-----------------------------------------------------------------------
module Gargantext.Core.Types.Main where
......@@ -22,6 +23,7 @@ import Data.Aeson.TH (deriveJSON)
import Data.Bimap (Bimap)
import Data.Swagger
import Data.Text (unpack, pack)
import Data.TreeDiff
import Gargantext.Core
import Gargantext.Core.Utils.Prefix (unPrefix, unPrefixSwagger, wellNamedSchema)
import Gargantext.Database.Admin.Types.Node -- (NodeType(..), Node, Hyperdata(..))
......@@ -49,7 +51,7 @@ type TypeId = Int
-- TODO multiple ListType declaration, remove it
-- data ListType = CandidateTerm | StopTerm | MapTerm
data ListType = CandidateTerm | StopTerm | MapTerm
deriving (Generic, Eq, Ord, Show, Read, Enum, Bounded)
deriving (Generic, Eq, Ord, Show, Read, Enum, Bounded, ToExpr)
instance ToJSON ListType
instance FromJSON ListType
......
......@@ -11,11 +11,12 @@ Portability : POSIX
{-# OPTIONS_GHC -fno-warn-orphans #-}
{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE DerivingStrategies #-}
{-# LANGUAGE DerivingVia #-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE TypeFamilies #-}
{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE DeriveAnyClass #-}
{-# LANGUAGE DerivingStrategies #-}
{-# LANGUAGE DerivingVia #-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE TypeFamilies #-}
-- {-# LANGUAGE DuplicateRecordFields #-}
......@@ -32,6 +33,7 @@ import Data.Morpheus.Types
import Data.Swagger
import Data.Text (unpack, pack)
import Data.Time (UTCTime)
import Data.TreeDiff
import Database.PostgreSQL.Simple.FromField (FromField, fromField, fromJSONField)
import Database.PostgreSQL.Simple.ToField (ToField, toField, toJSONField)
import Database.PostgreSQL.Simple.ToRow (ToRow, toRow)
......@@ -246,7 +248,9 @@ pgContextId = pgResourceId _ContextId
-- to a tree, and each node has its unique identifier. Note how nodes might
-- have also /other/ identifiers, to better qualify them.
newtype NodeId = UnsafeMkNodeId { _NodeId :: Int }
deriving (Read, Generic, Num, Eq, Ord, Enum, ToJSONKey, FromJSONKey, ToJSON, FromJSON, Hashable, Csv.ToField)
deriving stock (Read, Generic, Eq, Ord)
deriving newtype (Num, Enum, ToJSONKey, FromJSONKey, ToJSON, FromJSON, Hashable, Csv.ToField)
deriving anyclass (ToExpr)
instance ResourceId NodeId where
isPositive = (> 0) . _NodeId
......@@ -275,6 +279,7 @@ instance ToSchema NodeId
newtype ContextId = UnsafeMkContextId { _ContextId :: Int }
deriving stock (Show, Eq, Ord, Generic)
deriving newtype (Csv.ToField, ToJSONKey, FromJSONKey, ToJSON, FromJSON, ToField, ToSchema)
deriving anyclass ToExpr
deriving FromField via NodeId
instance ToParamSchema ContextId
......@@ -288,7 +293,9 @@ instance ToHttpApiData ContextId where
toUrlPiece (UnsafeMkContextId n) = toUrlPiece n
newtype NodeContextId = UnsafeMkNodeContextId { _NodeContextId :: Int }
deriving (Read, Generic, Num, Eq, Ord, Enum, ToJSONKey, FromJSONKey, ToJSON, FromJSON, Hashable, Csv.ToField)
deriving stock (Read, Generic, Eq, Ord)
deriving newtype (Num, Enum, ToJSONKey, FromJSONKey, ToJSON, FromJSON, Hashable, Csv.ToField)
deriving anyclass (ToExpr)
--instance Csv.ToField NodeId where
......
......@@ -9,7 +9,6 @@ import Data.Conduit
import Data.String
import Gargantext.Core (Lang(..))
import Gargantext.Core.Text.Corpus.Query
import Gargantext.Core.Types
import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..))
import Prelude
import System.Environment
......
......@@ -7,8 +7,10 @@ import Data.Coerce
import Data.Map.Strict qualified as Map
import Data.Monoid
import Data.Patch.Class qualified as Patch
import Data.String
import Data.Text qualified as T
import Data.Validity qualified as Validity
import Text.Collate qualified as Unicode
import Gargantext.API.Ngrams
import Gargantext.API.Ngrams.Types
import Gargantext.Core.Types.Main
......@@ -17,16 +19,20 @@ import Gargantext.Prelude
import Test.Ngrams.Query.PaginationCorpus
import Test.Tasty
import Test.Tasty.HUnit
import Test.Utils ((@??=))
tests :: TestTree
tests = testGroup "Ngrams" [unitTests]
curryElem :: NgramsElement
curryElem = mkNgramsElement "curry" MapTerm Nothing mempty
curryElem = mkMapTerm "curry"
elbaElem :: NgramsElement
elbaElem = mkNgramsElement "elba" MapTerm Nothing mempty
elbaElem = mkMapTerm "elba"
mkMapTerm :: T.Text -> NgramsElement
mkMapTerm e = mkNgramsElement (fromString . T.unpack $ e) MapTerm Nothing mempty
mockFlatCorpus :: Versioned (Map NgramsTerm NgramsElement)
mockFlatCorpus = Versioned 0 $ Map.fromList [
......@@ -43,6 +49,9 @@ unitTests = testGroup "Query tests"
[ -- Sorting
testCase "Simple query mockFlatCorpus" testFlat01
, testCase "Simple query (desc sorting)" testFlat02
, testCase "[#331] sorting via DUCET works" testSortDiacriticsDucet
, testCase "[#331] Natural sort ascending works" testNaturalSortAsceding
, testCase "[#331] Natural sort descending works" testNaturalSortDescending
-- -- Filtering
, testCase "Simple query (listType = MapTerm)" testFlat03
, testCase "Simple query (listType = StopTerm)" testFlat04
......@@ -96,6 +105,61 @@ testFlat02 = do
, _nsq_searchQuery = mockQueryFn Nothing
}
testSortDiacriticsDucet :: Assertion
testSortDiacriticsDucet = do
let inputData = [ "étude", "âge", "vue", "période" ]
let expected = [ "âge", "étude", "période", "vue" ]
expected @??= sortBy (Unicode.collate Unicode.rootCollator) inputData
testNaturalSortAsceding :: Assertion
testNaturalSortAsceding = do
let res = searchTableNgrams frenchCorpus searchQuery
res @??= VersionedWithCount 0 4 ( NgramsTable $ map mkMapTerm [ "âge", "étude", "période", "vue" ])
where
frenchCorpus :: Versioned (Map NgramsTerm NgramsElement)
frenchCorpus = Versioned 0 $ Map.fromList [
( "doc_01", mkMapTerm "période")
, ( "doc_02", mkMapTerm "vue")
, ( "doc_03", mkMapTerm "âge")
, ( "doc_04", mkMapTerm "étude")
]
searchQuery = NgramsSearchQuery {
_nsq_limit = Limit 10
, _nsq_offset = Nothing
, _nsq_listType = Nothing
, _nsq_minSize = Nothing
, _nsq_maxSize = Nothing
, _nsq_orderBy = Just TermAsc
, _nsq_searchQuery = mockQueryFn Nothing
}
testNaturalSortDescending :: Assertion
testNaturalSortDescending = do
let res = searchTableNgrams frenchCorpus searchQuery
res @??= VersionedWithCount 0 4 ( NgramsTable $ map mkMapTerm [ "vue", "période", "étude", "âge" ])
where
frenchCorpus :: Versioned (Map NgramsTerm NgramsElement)
frenchCorpus = Versioned 0 $ Map.fromList [
( "doc_01", mkMapTerm "période")
, ( "doc_02", mkMapTerm "vue")
, ( "doc_03", mkMapTerm "âge")
, ( "doc_04", mkMapTerm "étude")
]
searchQuery = NgramsSearchQuery {
_nsq_limit = Limit 10
, _nsq_offset = Nothing
, _nsq_listType = Nothing
, _nsq_minSize = Nothing
, _nsq_maxSize = Nothing
, _nsq_orderBy = Just TermDesc
, _nsq_searchQuery = mockQueryFn Nothing
}
testFlat03 :: Assertion
testFlat03 = do
let res = searchTableNgrams mockFlatCorpus searchQuery
......
......@@ -20,6 +20,7 @@ import Data.Text qualified as T
import Data.Text.Encoding qualified as TE
import Data.Text.Lazy qualified as TL
import Data.Text.Lazy.Encoding qualified as TLE
import Data.TreeDiff
import Fmt (Builder)
import Gargantext.API.Admin.Auth.Types (AuthRequest(..), Token, authRes_token)
import Gargantext.Core.Types.Individu (Username, GargPassword)
......@@ -37,7 +38,7 @@ import Test.Hspec.Expectations
import Test.Hspec.Wai (MatchBody(..), WaiExpectation, WaiSession, request)
import Test.Hspec.Wai.JSON (FromValue(..))
import Test.Hspec.Wai.Matcher (MatchHeader(..), ResponseMatcher(..), bodyEquals, formatHeader, match)
import Test.Tasty.HUnit (Assertion)
import Test.Tasty.HUnit (Assertion, assertBool)
import Test.Types
......@@ -223,3 +224,7 @@ pollUntilFinished tkn port mkUrlPiece = go 60
| otherwise
-> pure h
-- | Like HUnit's '@?=', but With a nicer error message in case the two entities are not equal.
(@??=) :: (HasCallStack, ToExpr a, Eq a) => a -> a -> Assertion
actual @??= expected =
assertBool (show $ ansiWlEditExprCompact $ ediff expected actual) (expected == actual)
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment