[Errors] polish evaluation errors (#6043)

We have this distinction of "user" vs "internal" errors that made some sense when TPLC was what was evaluated, but now that it's UPLC, all of those are "user" errors. So I was going to remove the distinction entirely just to realize that it in fact still makes sense, it's just now "operational" vs "structural". This PR clarifies this. I've also tweaked errors a bit while I was here, plus removed a bunch of unsafe functions from the CEK machine API (they were convenient when they were exposed from a single module, but now that we have several of them, it's no longer worth the trouble, plus we really should use some different approach to calling the CEK machine in the benchmarks, but that's not within the scope of this PR).
IntersectMBO · May 23, 2024 · 6c6918c · 6c6918c
1 parent 252b634
commit 6c6918c
Show file tree

Hide file tree

Showing 54 changed files with 216 additions and 295 deletions.
diff --git a/doc/notes/plutus-core/cek-budgeting-profiling/CostingDetails.md b/doc/notes/plutus-core/cek-budgeting-profiling/CostingDetails.md
@@ -70,7 +70,7 @@ instance (Eq fun, Hashable fun, ToExMemory term) =>
             Restricting resb ->
                 when (exceedsBudget resb newBudget) $
                     throwingWithCause _EvaluationError
-                        (UserEvaluationError $ CekOutOfExError resb newBudget)
+                        (OperationalEvaluationError $ CekOutOfExError resb newBudget)
                         Nothing  -- No value available for error
 ```
 
@@ -96,7 +96,7 @@ to the current mode:
                 newBudget <- exBudgetStateBudget <%= (<> budget)
                 when (exceedsBudget resb newBudget) $
                     throwingWithCause _EvaluationError
-                        (UserEvaluationError $ CekOutOfExError resb newBudget)
+                        (OperationalEvaluationError $ CekOutOfExError resb newBudget)
                         Nothing
 ```
 
@@ -114,7 +114,7 @@ of memory very quickly.  Changing the code to
             Restricting resb -> 
                 when (exceedsBudget resb newBudget) $
                     throwingWithCause _EvaluationError
-                        (UserEvaluationError $ CekOutOfExError resb newBudget)
+                        (OperationalEvaluationError $ CekOutOfExError resb newBudget)
                         Nothing
 ```
 

diff --git a/plutus-benchmark/nofib/exe/Main.hs b/plutus-benchmark/nofib/exe/Main.hs
@@ -202,7 +202,7 @@ options = hsubparser
 ---------------- Evaluation ----------------
 
 evaluateWithCek :: UPLC.Term UPLC.NamedDeBruijn DefaultUni DefaultFun () -> UPLC.EvaluationResult (UPLC.Term UPLC.NamedDeBruijn DefaultUni DefaultFun ())
-evaluateWithCek = UPLC.unsafeExtractEvaluationResult . (\(fstT,_,_) -> fstT) . UPLC.runCekDeBruijn PLC.defaultCekParameters UPLC.restrictingEnormous UPLC.noEmitter
+evaluateWithCek = UPLC.unsafeToEvaluationResult . (\(fstT,_,_) -> fstT) . UPLC.runCekDeBruijn PLC.defaultCekParameters UPLC.restrictingEnormous UPLC.noEmitter
 
 writeFlatNamed :: UPLC.Program UPLC.NamedDeBruijn DefaultUni DefaultFun () -> IO ()
 writeFlatNamed prog = BS.putStr . Flat.flat . UPLC.UnrestrictedProgram $ prog

diff --git a/plutus-core/changelog.d/20240517_191403_effectfully_polish_evaluation_errors.md b/plutus-core/changelog.d/20240517_191403_effectfully_polish_evaluation_errors.md
@@ -0,0 +1,7 @@
+### Removed
+
+- `unsafeRunCekNoEmit` and all `unsafeEvaluate*` functions in #6043. To replace e.g. `unsafeEvaluateCek` you can use `evaluateCek` in combination with `unsafeToEvaluationResult`.
+
+### Changed
+
+- Renamed `unsafeExtractEvaluationResult` to `unsafeToEvaluationResult`.
diff --git a/plutus-core/cost-model/budgeting-bench/Common.hs b/plutus-core/cost-model/budgeting-bench/Common.hs
@@ -78,13 +78,11 @@ benchWith
     -> String
     -> PlainTerm DefaultUni fun
     -> Benchmark
-benchWith params name term = bench name $ whnf (unsafeEvaluateCekNoEmit params) term
-{- ^ Note that to get sensible results with whnf, we must use an evaluation
-   function that looks at the result, so eg unsafeEvaluateCek won't work
-   properly because it returns a pair whose components won't be evaluated by
-   whnf.  We can't use nf because it does too much work: for instance if it gets
-   back a 'Data' value it'll traverse all of it.
--}
+-- Note that to get sensible results with 'whnf', we must use an evaluation function that looks at
+-- the result, so e.g. 'evaluateCek' won't work properly because it returns a pair whose components
+-- won't be evaluated by 'whnf'. We can't use 'nf' because it does too much work: for instance if it
+-- gets back a 'Data' value it'll traverse all of it.
+benchWith params name term = bench name $ whnf (evaluateCekNoEmit params) term
 
 benchDefault :: String -> PlainTerm DefaultUni DefaultFun -> Benchmark
 benchDefault = benchWith defaultCekParameters

diff --git a/plutus-core/plutus-core/src/PlutusCore/Evaluation/ErrorWithCause.hs b/plutus-core/plutus-core/src/PlutusCore/Evaluation/ErrorWithCause.hs
@@ -37,11 +37,14 @@ instance (Pretty err, Pretty cause) => Pretty (ErrorWithCause err cause) where
 
 instance (PrettyBy config cause, PrettyBy config err) =>
             PrettyBy config (ErrorWithCause err cause) where
-    prettyBy config (ErrorWithCause err mayCause) =
-        "An error has occurred: " <+> prettyBy config err <>
-            case mayCause of
-                Nothing    -> mempty
-                Just cause -> hardline <> "Caused by:" <+> prettyBy config cause
+    prettyBy config (ErrorWithCause err mayCause) = fold
+        [ "An error has occurred:"
+        , hardline
+        , prettyBy config err
+        , case mayCause of
+            Nothing    -> mempty
+            Just cause -> hardline <> "Caused by:" <+> prettyBy config cause
+        ]
 
 instance (PrettyPlc cause, PrettyPlc err) =>
             Show (ErrorWithCause err cause) where

diff --git a/plutus-core/plutus-core/src/PlutusCore/Evaluation/Machine/Ck.hs b/plutus-core/plutus-core/src/PlutusCore/Evaluation/Machine/Ck.hs
@@ -17,12 +17,11 @@ module PlutusCore.Evaluation.Machine.Ck
     , CkEvaluationException
     , CkM
     , CkValue
-    , extractEvaluationResult
     , runCk
+    , extractEvaluationResult
+    , unsafeToEvaluationResult
     , evaluateCk
     , evaluateCkNoEmit
-    , unsafeEvaluateCk
-    , unsafeEvaluateCkNoEmit
     , readKnownCk
     ) where
 
@@ -195,7 +194,7 @@ stack |> Constr _ ty i es               = case es of
     t : ts -> FrameConstr ty i ts [] : stack |> t
 stack |> Case _ _ arg cs         = FrameCase cs : stack |> arg
 _     |> Error{}                 =
-    throwingWithCause _EvaluationError (UserEvaluationError CkEvaluationFailure) Nothing
+    throwingWithCause _EvaluationError (OperationalEvaluationError CkEvaluationFailure) Nothing
 _     |> var@Var{}               =
     throwingWithCause _MachineError OpenTermEvaluatedMachineError $ Just var
 
@@ -312,22 +311,6 @@ evaluateCkNoEmit
     -> Either (CkEvaluationException uni fun) (Term TyName Name uni fun ())
 evaluateCkNoEmit runtime = fst . runCk runtime False
 
--- | Evaluate a term using the CK machine with logging enabled. May throw a 'CkEvaluationException'.
-unsafeEvaluateCk
-    :: ThrowableBuiltins uni fun
-    => BuiltinsRuntime fun (CkValue uni fun)
-    -> Term TyName Name uni fun ()
-    -> (EvaluationResult (Term TyName Name uni fun ()), [Text])
-unsafeEvaluateCk runtime = first unsafeExtractEvaluationResult . evaluateCk runtime
-
--- | Evaluate a term using the CK machine with logging disabled. May throw a 'CkEvaluationException'.
-unsafeEvaluateCkNoEmit
-    :: ThrowableBuiltins uni fun
-    => BuiltinsRuntime fun (CkValue uni fun)
-    -> Term TyName Name uni fun ()
-    -> EvaluationResult (Term TyName Name uni fun ())
-unsafeEvaluateCkNoEmit runtime = unsafeExtractEvaluationResult . evaluateCkNoEmit runtime
-
 -- | Unlift a value using the CK machine.
 readKnownCk
     :: ReadKnown (Term TyName Name uni fun ()) a

diff --git a/plutus-core/plutus-core/src/PlutusCore/Evaluation/Machine/Exception.hs b/plutus-core/plutus-core/src/PlutusCore/Evaluation/Machine/Exception.hs
@@ -29,7 +29,7 @@ module PlutusCore.Evaluation.Machine.Exception
     , throwing_
     , throwingWithCause
     , extractEvaluationResult
-    , unsafeExtractEvaluationResult
+    , unsafeToEvaluationResult
     ) where
 
 import PlutusPrelude
@@ -55,23 +55,41 @@ data MachineError fun
     | OpenTermEvaluatedMachineError
       -- ^ An attempt to evaluate an open term.
     | UnliftingMachineError UnliftingError
-      -- ^ An attempt to compute a constant application resulted in 'ConstAppError'.
+      -- ^ An attempt to compute a constant application resulted in 'UnliftingError'.
     | BuiltinTermArgumentExpectedMachineError
-      -- ^ A builtin expected a term argument, but something else was received
+      -- ^ A builtin expected a term argument, but something else was received.
     | UnexpectedBuiltinTermArgumentMachineError
       -- ^ A builtin received a term argument when something else was expected
     | NonConstrScrutinized
     | MissingCaseBranch Word64
     deriving stock (Show, Eq, Functor, Generic)
     deriving anyclass (NFData)
 
--- | The type of errors (all of them) which can occur during evaluation
--- (some are used-caused, some are internal).
-data EvaluationError user internal
-    = InternalEvaluationError !internal
-      -- ^ Indicates bugs.
-    | UserEvaluationError !user
-      -- ^ Indicates user errors.
+{- | The type of errors that can occur during evaluation. There are two kinds of errors:
+
+1. Operational ones -- these are errors that are indicative of the _logic_ of the program being
+   wrong. For example, 'Error' was executed, 'tailList' was applied to an empty list or evaluation
+   ran out of gas.
+2. Structural ones -- these are errors that are indicative of the _structure_ of the program being
+   wrong. For example, a free variable was encountered during evaluation, or a non-function was
+   applied to an argument.
+
+On the chain both of these are just regular failures and we don't distinguish between them there:
+if a script fails, it fails, it doesn't matter what the reason was. However in the tests it does
+matter why the failure occurred: a structural error may indicate that the test was written
+incorrectly while an operational error may be entirely expected.
+
+In other words, operational errors are regular runtime errors and structural errors are \"runtime
+type errors\". Which means that evaluating an (erased) well-typed program should never produce a
+structural error, only an operational one. This creates a sort of \"runtime type system\" for UPLC
+and it would be great to stick to it and enforce in tests etc, but we currently don't. For example,
+a built-in function expecting a list but getting something else should throw a structural error,
+but currently it'll throw an operational one. This is something that we plan to improve upon in
+future.
+-}
+data EvaluationError operational structural
+    = OperationalEvaluationError !operational
+    | StructuralEvaluationError !structural
     deriving stock (Show, Eq, Functor, Generic)
     deriving anyclass (NFData)
 
@@ -80,47 +98,50 @@ mtraverse makeClassyPrisms
     , ''EvaluationError
     ]
 
-instance internal ~ MachineError fun => AsMachineError (EvaluationError user internal) fun where
-    _MachineError = _InternalEvaluationError
-instance AsUnliftingError internal => AsUnliftingError (EvaluationError user internal) where
-    _UnliftingError = _InternalEvaluationError . _UnliftingError
+instance structural ~ MachineError fun =>
+        AsMachineError (EvaluationError operational structural) fun where
+    _MachineError = _StructuralEvaluationError
+instance AsUnliftingError structural =>
+        AsUnliftingError (EvaluationError operational structural) where
+    _UnliftingError = _StructuralEvaluationError . _UnliftingError
 instance AsUnliftingError (MachineError fun) where
     _UnliftingError = _UnliftingMachineError
-instance AsEvaluationFailure user => AsEvaluationFailure (EvaluationError user internal) where
-    _EvaluationFailure = _UserEvaluationError . _EvaluationFailure
-
-type EvaluationException user internal =
-    ErrorWithCause (EvaluationError user internal)
-
-{- Note [Ignoring context in UserEvaluationError]
-The UserEvaluationError error has a term argument, but
-extractEvaluationResult just discards this and returns
-EvaluationFailure.  This means that, for example, if we use the `plc`
-command to execute a program containing a division by zero, plc exits
-silently without reporting that anything has gone wrong (but returning
-a non-zero exit code to the shell via `exitFailure`).  This is because
-UserEvaluationError is used in cases when a PLC program itself goes
-wrong (for example, a failure due to `(error)`, a failure during
-builtin evaluation, or exceeding the gas limit).  This is used to
-signal unsuccessful in validation and so is not regarded as a real
-error; in contrast, machine errors, typechecking failures,
-and so on are genuine errors and we report their context if available.
- -}
-
--- | Turn any 'UserEvaluationError' into an 'EvaluationFailure'.
+instance AsEvaluationFailure operational =>
+        AsEvaluationFailure (EvaluationError operational structural) where
+    _EvaluationFailure = _OperationalEvaluationError . _EvaluationFailure
+
+type EvaluationException operational structural =
+    ErrorWithCause (EvaluationError operational structural)
+
+{- Note [Ignoring context in OperationalEvaluationError]
+The 'OperationalEvaluationError' error has a term argument, but 'extractEvaluationResult' just
+discards this and returns 'EvaluationFailure'. This means that, for example, if we use the @plc@
+command to execute a program containing a division by zero, @plc@ exits silently without reporting
+that anything has gone wrong (but returning a non-zero exit code to the shell via 'exitFailure').
+This is because 'OperationalEvaluationError' is used in cases when a PLC program itself goes wrong
+(see the Haddocks of 'EvaluationError'). This is used to signal unsuccessful validation and so is
+not regarded as a real error; in contrast structural errors are genuine errors and we report their
+context if available.
+-}
+
+-- See Note [Ignoring context in OperationalEvaluationError].
+-- | Preserve the contents of an 'StructuralEvaluationError' as a 'Left' and turn an
+-- 'OperationalEvaluationError' into a @Right EvaluationFailure@.
 extractEvaluationResult
-    :: Either (EvaluationException user internal term) a
-    -> Either (ErrorWithCause internal term) (EvaluationResult a)
+    :: Either (EvaluationException operational structural term) a
+    -> Either (ErrorWithCause structural term) (EvaluationResult a)
 extractEvaluationResult (Right term) = Right $ EvaluationSuccess term
 extractEvaluationResult (Left (ErrorWithCause evalErr cause)) = case evalErr of
-    InternalEvaluationError err -> Left  $ ErrorWithCause err cause
-    UserEvaluationError _       -> Right $ EvaluationFailure
+    StructuralEvaluationError err -> Left  $ ErrorWithCause err cause
+    OperationalEvaluationError _  -> Right $ EvaluationFailure
 
-unsafeExtractEvaluationResult
+-- | Throw on a 'StructuralEvaluationError' and turn an 'OperationalEvaluationError' into an
+-- 'EvaluationFailure'.
+unsafeToEvaluationResult
     :: (PrettyPlc internal, PrettyPlc term, Typeable internal, Typeable term)
     => Either (EvaluationException user internal term) a
     -> EvaluationResult a
-unsafeExtractEvaluationResult = unsafeFromEither . extractEvaluationResult
+unsafeToEvaluationResult = unsafeFromEither . extractEvaluationResult
 
 instance (HasPrettyDefaults config ~ 'True, Pretty fun) =>
             PrettyBy config (MachineError fun) where
@@ -145,13 +166,7 @@ instance (HasPrettyDefaults config ~ 'True, Pretty fun) =>
 
 instance
         ( HasPrettyDefaults config ~ 'True
-        , PrettyBy config internal, Pretty user
-        ) => PrettyBy config (EvaluationError user internal) where
-    prettyBy config (InternalEvaluationError err) = fold
-        [ "error:", hardline
-        , prettyBy config err
-        ]
-    prettyBy _      (UserEvaluationError err) = fold
-        [ "User error:", hardline
-        , pretty err
-        ]
+        , Pretty operational, PrettyBy config structural
+        ) => PrettyBy config (EvaluationError operational structural) where
+    prettyBy _      (OperationalEvaluationError operational) = pretty operational
+    prettyBy config (StructuralEvaluationError structural)   = prettyBy config structural
diff --git a/plutus-core/plutus-core/src/PlutusCore/TypeCheck.hs b/plutus-core/plutus-core/src/PlutusCore/TypeCheck.hs
@@ -36,9 +36,9 @@ import PlutusCore.Quote
 import PlutusCore.Rename
 import PlutusCore.TypeCheck.Internal
 
--- | The constraint for built-in types/functions are kind/type-checkable.
+-- | The constraint for built-in types\/functions are kind\/type-checkable.
 --
--- We keep this separate from 'MonadKindCheck'/'MonadTypeCheck', because those mainly constrain the
+-- We keep this separate from 'MonadKindCheck'\/'MonadTypeCheck', because those mainly constrain the
 -- monad and 'Typecheckable' constraints only the builtins. In particular useful when the monad gets
 -- instantiated and builtins don't. Another reason is that 'Typecheckable' is not required during
 -- type checking, since it's only needed for computing 'BuiltinTypes', which is passed as a regular

diff --git a/plutus-core/plutus-core/test/Evaluation/Machines.hs b/plutus-core/plutus-core/test/Evaluation/Machines.hs
@@ -20,10 +20,12 @@ import Test.Tasty
 import Test.Tasty.Hedgehog
 
 testMachine
-    :: (uni ~ DefaultUni, fun ~ DefaultFun, PrettyPlc internal)
+    :: (uni ~ DefaultUni, fun ~ DefaultFun, PrettyPlc structural)
     => String
     -> (Term TyName Name uni fun () ->
-           Either (EvaluationException user internal (Term TyName Name uni fun ())) (Term TyName Name uni fun ()))
+           Either
+               (EvaluationException operational structural (Term TyName Name uni fun ()))
+               (Term TyName Name uni fun ()))
     -> TestTree
 testMachine machine eval =
     testGroup machine $ fromInterestingTermGens $ \name ->

diff --git a/plutus-core/plutus-ir/test/PlutusIR/Compiler/Recursion/errorBinding.golden b/plutus-core/plutus-ir/test/PlutusIR/Compiler/Recursion/errorBinding.golden
@@ -1,4 +1,4 @@
-An error has occurred:  User error:
+An error has occurred:
 The machine terminated because of an error, either from a built-in function or from an explicit use of 'error'.
 Final budget: ({cpu: 100
 | mem: 100})

diff --git a/plutus-core/plutus-ir/test/PlutusIR/Transform/StrictLetRec/Tests/Lib.hs b/plutus-core/plutus-ir/test/PlutusIR/Transform/StrictLetRec/Tests/Lib.hs
@@ -29,8 +29,8 @@ import PlutusIR.Compiler qualified as PIR
 import PlutusIR.Core qualified as PIR
 import PlutusIR.Parser (pTerm)
 import UntypedPlutusCore.Core qualified as UPLC
-import UntypedPlutusCore.Evaluation.Machine.Cek (CekValue, EvaluationResult (..), logEmitter,
-                                                 unsafeEvaluateCek)
+import UntypedPlutusCore.Evaluation.Machine.Cek (CekValue, EvaluationResult (..), evaluateCek,
+                                                 logEmitter, unsafeToEvaluationResult)
 import UntypedPlutusCore.Evaluation.Machine.Cek.CekMachineCosts (CekMachineCosts)
 
 pirTermFromFile
@@ -66,7 +66,7 @@ compilePirProgramOrFail pirProgram = do
     & runExceptT
     >>= \case
       Left (er :: PIR.Error DefaultUni DefaultFun (Provenance ())) -> fail $ show er
-      Right p -> pure (void p)
+      Right p                                                      -> pure (void p)
 
 compileTplcProgramOrFail
   :: (MonadFail m)
@@ -83,7 +83,8 @@ evaluateUplcProgramWithTraces
   :: UPLC.Program Name DefaultUni DefaultFun ()
   -> (EvaluationResult (UPLC.Term Name DefaultUni DefaultFun ()), [Text])
 evaluateUplcProgramWithTraces uplcProg =
-  unsafeEvaluateCek logEmitter machineParameters (uplcProg ^. UPLC.progTerm)
+  first unsafeToEvaluationResult $
+    evaluateCek logEmitter machineParameters (uplcProg ^. UPLC.progTerm)
  where
   costModel :: CostModel CekMachineCosts BuiltinCostModel =
     CostModel defaultCekMachineCosts defaultBuiltinCostModel
@@ -102,5 +103,5 @@ defaultCompilationCtx = do
 handlePirErrorByFailing
   :: (Pretty ann, MonadFail m) => Either (PIR.Error DefaultUni DefaultFun ann) a -> m a
 handlePirErrorByFailing = \case
-  Left e -> fail $ show e
+  Left e  -> fail $ show e
   Right x -> pure x
diff --git a/plutus-core/testlib/PlutusCore/Generators/Hedgehog/Test.hs b/plutus-core/testlib/PlutusCore/Generators/Hedgehog/Test.hs
@@ -95,11 +95,11 @@ sampleProgramValueGolden folder name genTerm = do
 propEvaluate
     :: ( uni ~ DefaultUni, fun ~ DefaultFun
        , KnownTypeAst TyName uni a, MakeKnown (Term TyName Name uni fun ()) a
-       , PrettyPlc internal
+       , PrettyPlc structural
        )
     => (Term TyName Name uni fun () ->
            Either
-            (EvaluationException user internal (Term TyName Name uni fun ()))
+            (EvaluationException operational structural (Term TyName Name uni fun ()))
             (Term TyName Name uni fun ()))
        -- ^ An evaluator.
     -> TermGen a  -- ^ A term/value generator.

diff --git a/plutus-core/testlib/PlutusCore/Generators/Hedgehog/TypeEvalCheck.hs b/plutus-core/testlib/PlutusCore/Generators/Hedgehog/TypeEvalCheck.hs
@@ -94,11 +94,11 @@ type TypeEvalCheckM uni fun = Either (TypeEvalCheckError uni fun)
 typeEvalCheckBy
     :: ( uni ~ DefaultUni, fun ~ DefaultFun
        , KnownTypeAst TyName uni a, MakeKnown (Term TyName Name uni fun ()) a
-       , PrettyPlc internal
+       , PrettyPlc structural
        )
     => (Term TyName Name uni fun () ->
            Either
-               (EvaluationException user internal (Term TyName Name uni fun ()))
+               (EvaluationException operational structural (Term TyName Name uni fun ()))
                (Term TyName Name uni fun ()))
        -- ^ An evaluator.
     -> TermOf (Term TyName Name uni fun ()) a