Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Test] Make tests take reasonable time #6286

Merged
merged 5 commits into from
Jul 23, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion plutus-core/plutus-core/test/Evaluation/Machines.hs
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ import PlutusCore.Evaluation.Machine.Exception
import PlutusCore.Generators.Hedgehog.Interesting
import PlutusCore.Generators.Hedgehog.Test
import PlutusCore.Pretty
import PlutusCore.Test

import Test.Tasty
import Test.Tasty.Hedgehog
Expand All @@ -29,7 +30,9 @@ testMachine
-> TestTree
testMachine machine eval =
testGroup machine $ fromInterestingTermGens $ \name ->
testPropertyNamed name (fromString name) . propEvaluate eval
testPropertyNamed name (fromString name)
. mapTestLimitAtLeast 50 (`div` 10)
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is an example of the changes in this PR. mapTestLimitAtLeast n (`div` m) runs no fewer than n tests (i.e. you'll get n tests if you just run the test suite locally) and (`div` m) says that the amount of tests runs in the nightly test suite need to be downscaled by m times (if would perhaps be better to upscale instead of downscaling but I just wanted to get something working and we already run the nightly test suite with 50000 hedgehog tests, so I took that as a given).

. propEvaluate eval

test_machines :: TestTree
test_machines = testGroup
Expand Down
4 changes: 3 additions & 1 deletion plutus-core/plutus-core/test/Evaluation/Spec.hs
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ import PlutusCore.Evaluation.Machine.ExBudgetingDefaults
import PlutusCore.Evaluation.Machine.ExBudgetStream (ExBudgetStream (..))
import PlutusCore.Generators.Hedgehog (GenArbitraryTerm (..), GenTypedTerm (..), forAllNoShow)
import PlutusCore.Pretty
import PlutusCore.Test
import PlutusPrelude

import Control.Exception
Expand Down Expand Up @@ -57,7 +58,8 @@ test_builtinsDon'tThrow =
testPropertyNamed
(display fun)
(fromString $ display fun)
(prop_builtinEvaluation runtimes fun gen f)
(mapTestLimitAtLeast 99 (`div` 50) $
prop_builtinEvaluation runtimes fun gen f)
where
gen bn = Gen.choice [genArgsWellTyped def bn, genArgsArbitrary def bn]
f bn args = \case
Expand Down
16 changes: 10 additions & 6 deletions plutus-core/plutus-core/test/Names/Spec.hs
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,6 @@

module Names.Spec where

import Data.String (IsString (fromString))
import Data.Text qualified as Text
import Hedgehog (Gen, Property, assert, forAll, property, tripping)
import Hedgehog.Gen qualified as Gen
import PlutusCore (DefaultFun, DefaultUni, FreeVariableError, Kind (Type), Name (..), NamedDeBruijn,
NamedTyDeBruijn, Program, Quote, Rename (rename), Term (..), TyName (..),
Type (..), Unique (..), deBruijnTerm, runQuote, runQuoteT, unDeBruijnTerm)
Expand All @@ -23,7 +19,13 @@ import PlutusCore.Parser qualified as Parser
import PlutusCore.Pretty (display, displayPlcSimple)
import PlutusCore.Rename.Internal (renameProgramM)
import PlutusCore.Test (BindingRemoval (BindingRemovalNotOk), Prerename (PrerenameNo), brokenRename,
checkFails, noMarkRename, test_scopingGood, test_scopingSpoilRenamer)
checkFails, mapTestLimitAtLeast, noMarkRename, test_scopingGood,
test_scopingSpoilRenamer)

import Data.String (IsString (fromString))
import Data.Text qualified as Text
import Hedgehog (Gen, Property, assert, forAll, property, tripping)
import Hedgehog.Gen qualified as Gen
import Test.Tasty (TestTree, testGroup)
import Test.Tasty.Hedgehog (testPropertyNamed)
import Test.Tasty.HUnit (assertBool, testCase, (@?=))
Expand All @@ -43,7 +45,9 @@ test_DeBruijnInteresting :: TestTree
test_DeBruijnInteresting =
testGroup "de Bruijn transformation round-trip" $
fromInterestingTermGens \name ->
testPropertyNamed name (fromString name) . prop_DeBruijn
testPropertyNamed name (fromString name)
. mapTestLimitAtLeast 99 (`div` 10)
. prop_DeBruijn

test_mangle :: TestTree
test_mangle =
Expand Down
12 changes: 7 additions & 5 deletions plutus-core/plutus-core/test/Normalization/Type.hs
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import PlutusCore
import PlutusCore.Generators.Hedgehog.AST
import PlutusCore.MkPlc
import PlutusCore.Normalize
import PlutusCore.Test

import Control.Monad.Morph (hoist)

Expand All @@ -30,15 +31,16 @@ test_appAppLamLam = do
integer2 @?= integer2'

test_normalizeTypesInIdempotent :: Property
test_normalizeTypesInIdempotent = property . hoist (pure . runQuote) $ do
termNormTypes <- forAllT $ runAstGen (genTerm @DefaultFun) >>= normalizeTypesIn
termNormTypes' <- normalizeTypesIn termNormTypes
termNormTypes === termNormTypes'
test_normalizeTypesInIdempotent =
mapTestLimitAtLeast 300 (`div` 10) . property . hoist (pure . runQuote) $ do
termNormTypes <- forAllT $ runAstGen (genTerm @DefaultFun) >>= normalizeTypesIn
termNormTypes' <- normalizeTypesIn termNormTypes
termNormTypes === termNormTypes'

test_typeNormalization :: TestTree
test_typeNormalization =
testGroup "typeNormalization"
[ testCase "appAppLamLam" test_appAppLamLam
[ testCase "appAppLamLam" test_appAppLamLam
, testPropertyNamed
"normalizeTypesInIdempotent"
"normalizeTypesInIdempotent"
Expand Down
5 changes: 2 additions & 3 deletions plutus-core/plutus-core/test/Spec.hs
Original file line number Diff line number Diff line change
Expand Up @@ -205,11 +205,10 @@ genConstantForTest =
m = fromIntegral (maxBound :: Int) :: Integer

{- | Check that printing followed by parsing is the identity function on
constants. This is quite fast, so we do it 1000 times to get good coverage
of the various generators.
constants.
-}
propLexConstant :: Property
propLexConstant = withTests (1000 :: Hedgehog.TestLimit) . property $ do
propLexConstant = mapTestLimitAtLeast 200 (`div` 10) . property $ do
term <- forAllPretty $ Constant () <$> runAstGen genConstantForTest
Hedgehog.tripping term displayPlc (fmap void . parseTm)
where
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,4 +9,4 @@ import Test.QuickCheck

-- | This mainly tests that the `Data` generator isn't non-terminating or too slow.
prop_genData :: Property
prop_genData = withMaxSuccess 3000 $ \(d :: Data) -> d === deserialise (serialise d)
prop_genData = withMaxSuccess 800 $ \(d :: Data) -> d === deserialise (serialise d)
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Currently we do not upscale any QuickCheck tests in the nightly test suite. We really should, but that's gonna be future work. In the meantime, I'm downscaling everything to run

  • ideally, under 1 second
  • but under 1.5 seconds is fine too, especially if the test is helpful
  • under 2 seconds for really important tests
  • under 3 seconds for anything that I can't get under 2 without sacrificing the utility of a test too much

on my machine. On a different machine the tests would obviously take a different amount of time, but they'll perhaps be still fast and consistent enough.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ import Test.QuickCheck hiding (choose, vectorOf)
-- So we don't get great coverage, but given that it takes a few seconds to generate dozens of
-- thousands of (non-filtered) test cases, we do still get some reasonable coverage in the end.
prop_unify :: Property
prop_unify = withMaxSuccess 10000 $
prop_unify = withMaxSuccess 500 $
forAllDoc "n" arbitrary shrink $ \ (NonNegative n) ->
forAllDoc "nSub" (choose (0, n)) shrink $ \ nSub ->
-- See Note [Chaotic Good fresh name generation].
Expand Down Expand Up @@ -84,7 +84,7 @@ prop_unifyRename =
-- | Check that substitution eliminates from the type all free occurrences of variables present in
-- the domain of the substitution.
prop_substType :: Property
prop_substType = withMaxSuccess 10000 $
prop_substType = withMaxSuccess 1000 $
-- No shrinking because every nested shrink makes properties harder to shrink (because you'd need
-- to regenerate the stuff that depends on the context, meaning you don't have the same
-- counterexample as you did before) and context minimality doesn't help readability very much.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ prop_genKindCorrect = p_genKindCorrect False
-- See Note [Debugging generators that don't generate well-typed/kinded terms/types]
-- and see the utility tests below when this property fails.
p_genKindCorrect :: Bool -> Property
p_genKindCorrect debug = withMaxSuccess 100000 $
p_genKindCorrect debug = withMaxSuccess 1000 $
-- Context minimality doesn't help readability, so no shrinking here
forAllDoc "ctx" genCtx (const []) $ \ ctx ->
-- Note, no shrinking here because shrinking relies on well-kindedness.
Expand All @@ -30,7 +30,7 @@ p_genKindCorrect debug = withMaxSuccess 100000 $

-- | Check that shrinking types maintains kinds.
prop_shrinkTypeSound :: Property
prop_shrinkTypeSound = withMaxSuccess 30000 $
prop_shrinkTypeSound = withMaxSuccess 500 $
forAllDoc "ctx" genCtx (const []) $ \ ctx ->
forAllDoc "k,ty" (genKindAndTypeWithCtx ctx) (shrinkKindAndType ctx) $ \ (k, ty) ->
-- See discussion about the same trick in 'prop_shrinkTermSound'.
Expand All @@ -44,7 +44,7 @@ prop_shrinkTypeSound = withMaxSuccess 30000 $

-- | Test that shrinking a type results in a type of a smaller kind. Useful for debugging shrinking.
prop_shrinkTypeSmallerKind :: Property
prop_shrinkTypeSmallerKind = withMaxSuccess 30000 $
prop_shrinkTypeSmallerKind = withMaxSuccess 3000 $
forAllDoc "k,ty" genKindAndType (shrinkKindAndType Map.empty) $ \ (k, ty) ->
assertNoCounterexamples
[ (k', ty')
Expand All @@ -60,7 +60,7 @@ prop_shrinkKindSmaller = withMaxSuccess 30000 $

-- | Test that fixKind actually gives you something of the right kind.
prop_fixKind :: Property
prop_fixKind = withMaxSuccess 30000 $
prop_fixKind = withMaxSuccess 10000 $
forAllDoc "ctx" genCtx (const []) $ \ ctx ->
forAllDoc "k,ty" genKindAndType (shrinkKindAndType ctx) $ \ (k, ty) ->
-- Note, fixKind only works on smaller kinds, so we use shrink to get a definitely smaller kind
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ prop_genTypeCorrect = p_genTypeCorrect False
-- See Note [Debugging generators that don't generate well-typed/kinded terms/types]
-- and the utility properties below when this property fails.
p_genTypeCorrect :: Bool -> Property
p_genTypeCorrect debug = withMaxSuccess 10000 $ do
p_genTypeCorrect debug = withMaxSuccess 200 $ do
-- Note, we don't shrink this term here because a precondition of shrinking is that
-- the term we are shrinking is well-typed. If it is not, the counterexample we get
-- from shrinking will be nonsene.
Expand All @@ -66,7 +66,7 @@ p_genTypeCorrect debug = withMaxSuccess 10000 $ do
-- | Test that when we generate a fully applied term we end up
-- with a well-typed term.
prop_genWellTypedFullyApplied :: Property
prop_genWellTypedFullyApplied = withMaxSuccess 1000 $
prop_genWellTypedFullyApplied = withMaxSuccess 50 $
forAllDoc "ty, tm" genTypeAndTerm_ shrinkClosedTypedTerm $ \ (ty, tm) ->
-- No shrinking here because if `genFullyApplied` is wrong then the shrinking
-- will be wrong too. See `prop_genTypeCorrect`.
Expand Down Expand Up @@ -99,7 +99,7 @@ prop_shrinkTermSound = withMaxSuccess 10 $

-- | Test that `findInstantiation` results in a well-typed instantiation.
prop_findInstantiation :: Property
prop_findInstantiation = withMaxSuccess 10000 $
prop_findInstantiation = withMaxSuccess 1000 $
forAllDoc "ctx" genCtx (const []) $ \ ctx0 ->
forAllDoc "ty" (genTypeWithCtx ctx0 $ Type ()) (shrinkType ctx0) $ \ ty0 ->
forAllDoc "target" (genTypeWithCtx ctx0 $ Type ()) (shrinkType ctx0) $ \ target ->
Expand Down Expand Up @@ -160,7 +160,7 @@ prop_stats_numShrink = withMaxSuccess 10 $

-- | Specific test that `inhabitType` returns well-typed things
prop_inhabited :: Property
prop_inhabited = withMaxSuccess 3000 $
prop_inhabited = withMaxSuccess 50 $
-- No shrinking here because if the generator
-- generates nonsense shrinking will be nonsense.
forAllDoc "ty,tm" (genInhab mempty) (const []) $ \ (ty, tm) -> typeCheckTerm tm ty
Expand Down
21 changes: 17 additions & 4 deletions plutus-core/plutus-ir/test/PlutusIR/Parser/Tests.hs
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ import PlutusCore (runQuoteT)
import PlutusCore.Annotation
import PlutusCore.Default qualified as PLC
import PlutusCore.Error (ParserErrorBundle)
import PlutusCore.Test (mapTestLimitAtLeast)
import PlutusIR
import PlutusIR.Generators.AST
import PlutusIR.Parser
Expand Down Expand Up @@ -130,8 +131,20 @@ propIgnores splice = property $ do

test_parsing :: TestTree
test_parsing = testGroup "parsing"
[ testPropertyNamed "parser round-trip" "propRoundTrip" propRoundTrip
, testPropertyNamed "parser ignores whitespace" "propIgnores whitespace" (propIgnores whitespace)
, testPropertyNamed "parser ignores comments" "propIgnores comments" (propIgnores comment)
, testPropertyNamed "parser captures ending positions correctly" "propTermSrcSpan" propTermSrcSpan
[ testPropertyNamed
"parser round-trip"
"propRoundTrip"
(mapTestLimitAtLeast 99 (`div` 2) $ propRoundTrip)
, testPropertyNamed
"parser ignores whitespace"
"propIgnores whitespace"
(mapTestLimitAtLeast 50 (`div` 8) $ propIgnores whitespace)
, testPropertyNamed
"parser ignores comments"
"propIgnores comments"
(mapTestLimitAtLeast 30 (`div` 30) $ propIgnores comment)
, testPropertyNamed
"parser captures ending positions correctly"
"propTermSrcSpan"
(mapTestLimitAtLeast 99 (`div` 2) $ propTermSrcSpan)
]
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ test_evaluateBuiltins =

prop_evaluateBuiltins :: Bool -> BuiltinSemanticsVariant DefaultFun -> Property
prop_evaluateBuiltins conservative biVariant =
withMaxSuccess (2 * 3 * numTestsForPassProp) $
withMaxSuccess numTestsForPassProp $
testPassProp
runIdentity
$ \tc -> evaluateBuiltinsPass tc conservative (def {_biSemanticsVariant = biVariant}) def
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ test_inline =
prop_inline ::
BuiltinSemanticsVariant DefaultFun -> Property
prop_inline biVariant =
withMaxSuccess (3 * numTestsForPassProp) $
withMaxSuccess numTestsForPassProp $
testPassProp
runQuote
$ \tc -> inlinePassSC True tc mempty (def {_biSemanticsVariant = biVariant})
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ test_letFloatInRelaxed =
prop_floatIn ::
BuiltinSemanticsVariant PLC.DefaultFun -> Bool -> Property
prop_floatIn biVariant conservative =
withMaxSuccess (3 * 2 * numTestsForPassProp) $ testPassProp runQuote testPass
withMaxSuccess numTestsForPassProp $ testPassProp runQuote testPass
where
testPass tcconfig =
LetFloatIn.floatTermPassSC
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -64,8 +64,8 @@ test_letFloatOut =
<> RecSplit.recSplitPass tcconfig
<> LetMerge.letMergePass tcconfig

prop_floatIn :: BuiltinSemanticsVariant PLC.DefaultFun -> Property
prop_floatIn biVariant = withMaxSuccess (3 * numTestsForPassProp) $ testPassProp runQuote testPass
prop_floatOut :: BuiltinSemanticsVariant PLC.DefaultFun -> Property
prop_floatOut biVariant = withMaxSuccess numTestsForPassProp $ testPassProp runQuote testPass
where
testPass tcconfig =
LetFloatOut.floatTermPassSC
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,5 +21,5 @@ test_nonStrict =
]

prop_nonStrict :: Bool -> Property
prop_nonStrict useUnit = withMaxSuccess (2 * numTestsForPassProp) $
prop_nonStrict useUnit = withMaxSuccess numTestsForPassProp $
testPassProp runQuote $ \tc -> NonStrict.compileNonStrictBindingsPassSC tc useUnit
8 changes: 4 additions & 4 deletions plutus-core/testlib/PlutusCore/Generators/NEAT/Spec.hs
Original file line number Diff line number Diff line change
Expand Up @@ -78,19 +78,19 @@ tests :: TestTree
tests =
testGroup "NEAT"
-- the `adjustOption (min ...)` allows to make these big tests easier at runtime
[ adjustOption (min $ GenDepth 13) $
[ adjustOption (min $ GenDepth 10) $
bigTest "normalization commutes with conversion from generated types"
(Type ())
(packAssertion prop_normalizeConvertCommuteTypes)
, adjustOption (min $ GenDepth 14) $
, adjustOption (min $ GenDepth 12) $
bigTest "normal types cannot reduce"
(Type ())
(packAssertion prop_normalTypesCannotReduce)
, adjustOption (min $ GenDepth 18) $
, adjustOption (min $ GenDepth 15) $
bigTest "type preservation - CK"
(TyBuiltinG TyUnitG)
(packAssertion prop_typePreservation)
, adjustOption (min $ GenDepth 18) $
, adjustOption (min $ GenDepth 15) $
bigTest "typed CK vs untyped CEK produce the same output"
(TyBuiltinG TyUnitG)
(packAssertion prop_agree_termEval)
Expand Down
15 changes: 12 additions & 3 deletions plutus-core/testlib/PlutusCore/Test.hs
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,9 @@
{-# OPTIONS_GHC -Wno-orphans #-}

module PlutusCore.Test (
mapTestLimit,
withAtLeastTests,
mapTestLimitAtLeast,
checkFails,
ToTPlc (..),
ToUPlc (..),
Expand Down Expand Up @@ -104,12 +107,18 @@ mapTestLimit f =
EarlyTermination c tests -> EarlyTermination c $ f tests
}

{- | Set the number of times a property should be executed before it is considered
successful, unless it's already higher than that.
{- | Set the number of times a property should be executed before it is considered successful,
unless it's already higher than that.
-}
withAtLeastTests :: TestLimit -> Property -> Property
withAtLeastTests = mapTestLimit . max

{- | Set the number of times a property should be executed before it is considered successful,
unless the given function scales it higher than that.
-}
mapTestLimitAtLeast :: TestLimit -> (TestLimit -> TestLimit) -> Property -> Property
mapTestLimitAtLeast n f = withAtLeastTests n . mapTestLimit f

{- | @check@ is supposed to just check if the property fails or not, but for some stupid reason it
also performs shrinking and prints the counterexample and other junk. This function is like
@check@, but doesn't do any of that.
Expand Down Expand Up @@ -568,7 +577,7 @@ prop_scopingFor ::
-- | The runner of the pass.
(t NameAnn -> TPLC.Quote (t NameAnn)) ->
Property
prop_scopingFor gen bindRem preren run = withTests 1000 . property $ do
prop_scopingFor gen bindRem preren run = withTests 200 . property $ do
prog <- forAllNoShow $ runAstGen gen
let catchEverything = unsafePerformIO . try @SomeException . evaluate
prep = runPrerename preren
Expand Down
2 changes: 1 addition & 1 deletion plutus-core/testlib/PlutusIR/Pass/Test.hs
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ instance Arbitrary (BuiltinSemanticsVariant PLC.DefaultFun) where
-- exploration of the program space. If you also take other arguments, then consider multiplying
-- this up in order to account for the larger space.
numTestsForPassProp :: Int
numTestsForPassProp = 3000
numTestsForPassProp = 99
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Let's ban ever running 100 tests. We don't need a default to decide for us how many tests to run, so let's ensure that we always make the choice ourselves. Seeing 100 in the test output should mean "oops, we forgot to explicitly specify the number of tests".


-- | Run a 'Pass' on a 'Term', setting up the typechecking config and throwing errors.
runTestPass
Expand Down
Loading