diff --git a/plutus-core/changelog.d/20240523_124004_koz.ross_bitwise_2.md b/plutus-core/changelog.d/20240523_124004_koz.ross_bitwise_2.md new file mode 100644 index 00000000000..d4cded39098 --- /dev/null +++ b/plutus-core/changelog.d/20240523_124004_koz.ross_bitwise_2.md @@ -0,0 +1,39 @@ + + + +### Added + +- Implementation and tests for primitive operations in [this + CIP](https://github.com/mlabs-haskell/CIPs/blob/koz/bitwise/CIP-XXXX/CIP-XXXX.md) + +### Changed + +- Rename `ReplicateByteString` to `ReplicateByte` (and similarly for denotation) + + + + diff --git a/plutus-core/plutus-core.cabal b/plutus-core/plutus-core.cabal index 91948b911c1..02313c2b80e 100644 --- a/plutus-core/plutus-core.cabal +++ b/plutus-core/plutus-core.cabal @@ -92,8 +92,7 @@ library PlutusCore.Analysis.Definitions PlutusCore.Annotation PlutusCore.Arity - PlutusCore.Bitwise.Convert - PlutusCore.Bitwise.Logical + PlutusCore.Bitwise PlutusCore.Builtin PlutusCore.Builtin.Debug PlutusCore.Builtin.Elaborate @@ -417,6 +416,7 @@ test-suite untyped-plutus-core-test DeBruijn.Spec DeBruijn.UnDeBruijnify Evaluation.Builtins + Evaluation.Builtins.Bitwise Evaluation.Builtins.BLS12_381 Evaluation.Builtins.BLS12_381.TestClasses Evaluation.Builtins.BLS12_381.Utils @@ -430,6 +430,7 @@ test-suite untyped-plutus-core-test Evaluation.Debug Evaluation.FreeVars Evaluation.Golden + Evaluation.Helpers Evaluation.Machines Evaluation.Regressions Flat.Spec diff --git a/plutus-core/plutus-core/src/PlutusCore/Bitwise.hs b/plutus-core/plutus-core/src/PlutusCore/Bitwise.hs new file mode 100644 index 00000000000..7ffae07dbf6 --- /dev/null +++ b/plutus-core/plutus-core/src/PlutusCore/Bitwise.hs @@ -0,0 +1,1168 @@ +-- editorconfig-checker-disable-file + +{-# LANGUAGE BangPatterns #-} +{-# LANGUAGE MagicHash #-} +{-# LANGUAGE OverloadedStrings #-} + +-- | Implementations for CIP-121, CIP-122 and CIP-123. Grouped because they all operate on +-- 'ByteString's, and require similar functionality. +module PlutusCore.Bitwise ( + -- * Wrappers + integerToByteStringWrapper, + byteStringToIntegerWrapper, + -- * Implementation details + IntegerToByteStringError (..), + integerToByteStringMaximumOutputLength, + integerToByteString, + byteStringToInteger, + andByteString, + orByteString, + xorByteString, + complementByteString, + readBit, + writeBits, + replicateByte, + shiftByteString, + rotateByteString, + countSetBits, + findFirstSetBit + ) where + +import PlutusCore.Builtin (BuiltinResult, emit) +import PlutusCore.Evaluation.Result (evaluationFailure) + +import ByteString.StrictBuilder (Builder) +import ByteString.StrictBuilder qualified as Builder +import Control.Exception (Exception, throw, try) +import Control.Monad (guard, unless, when) +import Data.Bits (unsafeShiftL, unsafeShiftR, (.|.)) +import Data.Bits qualified as Bits +import Data.ByteString (ByteString) +import Data.ByteString qualified as BS +import Data.ByteString.Internal qualified as BSI +import Data.Foldable (for_, traverse_) +import Data.Text (pack) +import Data.Word (Word64, Word8) +import Foreign.Marshal.Utils (copyBytes, fillBytes) +import Foreign.Ptr (Ptr, castPtr, plusPtr) +import Foreign.Storable (peekByteOff, peekElemOff, pokeByteOff, pokeElemOff) +import GHC.ByteOrder (ByteOrder (BigEndian, LittleEndian)) +import GHC.Exts (Int (I#)) +import GHC.Integer.Logarithms (integerLog2#) +import GHC.IO.Unsafe (unsafeDupablePerformIO) + +{- Note [Input length limitation for IntegerToByteString]. We make + `integerToByteString` fail if it is called with arguments which would cause + the length of the result to exceed about 8K bytes because the execution time + becomes difficult to predict accurately beyond this point (benchmarks on a + number of different machines show that the CPU time increases smoothly for + inputs up to about 8K then increases sharply, becoming chaotic after about + 14K). This restriction may be removed once a more efficient implementation + becomes available, which may happen when we no longer have to support GHC + 8.10. -} +{- NB: if we do relax the length restriction then we will need two variants of + integerToByteString in Plutus Core so that we can continue to support the + current behaviour for old scripts.-} +integerToByteStringMaximumOutputLength :: Integer +integerToByteStringMaximumOutputLength = 8192 + +{- Return the base 2 logarithm of an integer, returning 0 for inputs that aren't + strictly positive. This is essentially copied from GHC.Num.Integer, which + has integerLog2 but only in GHC >= 9.0. We should use the library function + instead when we stop supporting 8.10. -} +integerLog2 :: Integer -> Int +integerLog2 !i = I# (integerLog2# i) + +-- | Wrapper for 'integerToByteString' to make it more convenient to define as a builtin. +integerToByteStringWrapper :: Bool -> Integer -> Integer -> BuiltinResult ByteString +integerToByteStringWrapper endiannessArg lengthArg input + -- Check that the length is non-negative. + | lengthArg < 0 = do + emit "integerToByteString: negative length argument" + emit $ "Length requested: " <> (pack . show $ input) + evaluationFailure + -- Check that the requested length does not exceed the limit. *NB*: if we remove the limit we'll + -- still have to make sure that the length fits into an Int. + | lengthArg > integerToByteStringMaximumOutputLength = do + emit . pack $ "integerToByteString: requested length is too long (maximum is " + ++ show integerToByteStringMaximumOutputLength + ++ " bytes)" + emit $ "Length requested: " <> (pack . show $ lengthArg) + evaluationFailure + -- If the requested length is zero (ie, an explicit output size is not + -- specified) we still have to make sure that the output won't exceed the size + -- limit. If the requested length is nonzero and less than the limit, + -- integerToByteString checks that the input fits. + | lengthArg == 0 -- integerLog2 n is one less than the number of significant bits in n + && fromIntegral (integerLog2 input) >= 8 * integerToByteStringMaximumOutputLength = + let bytesRequiredFor n = integerLog2 n `div` 8 + 1 + -- ^ This gives 1 instead of 0 for n=0, but we'll never get that. + in do + emit . pack $ "integerToByteString: input too long (maximum is 2^" + ++ show (8 * integerToByteStringMaximumOutputLength) + ++ "-1)" + emit $ "Length required: " <> (pack . show $ bytesRequiredFor input) + evaluationFailure + | otherwise = let endianness = endiannessArgToByteOrder endiannessArg in + -- We use fromIntegral here, despite advice to the contrary in general when defining builtin + -- denotations. This is because, if we've made it this far, we know that overflow or truncation + -- are impossible: we've checked that whatever we got given fits inside a (non-negative) Int. + case integerToByteString endianness (fromIntegral lengthArg) input of + Left err -> case err of + NegativeInput -> do + emit "integerToByteString: cannot convert negative Integer" + -- This does work proportional to the size of input. However, we're in a failing case + -- anyway, and the user's paid for work proportional to this size in any case. + emit $ "Input: " <> (pack . show $ input) + evaluationFailure + NotEnoughDigits -> do + emit "integerToByteString: cannot represent Integer in given number of bytes" + -- This does work proportional to the size of input. However, we're in a failing case + -- anyway, and the user's paid for work proportional to this size in any case. + emit $ "Input: " <> (pack . show $ input) + emit $ "Bytes requested: " <> (pack . show $ lengthArg) + evaluationFailure + Right result -> pure result + +-- | Wrapper for 'byteStringToInteger' to make it more convenient to define as a builtin. +byteStringToIntegerWrapper :: + Bool -> ByteString -> Integer +byteStringToIntegerWrapper statedEndiannessArg input = + let endianness = endiannessArgToByteOrder statedEndiannessArg in + byteStringToInteger endianness input + +-- | Structured type to help indicate conversion errors. +data IntegerToByteStringError = + NegativeInput | + NotEnoughDigits + deriving stock (Eq, Show) + +-- | Conversion from 'Integer' to 'ByteString', as per +-- [CIP-121](https://github.com/cardano-foundation/CIPs/tree/master/CIP-0121). +-- +-- For performance and clarity, the endianness argument uses +-- 'ByteOrder', and the length argument is an 'Int'. +integerToByteString :: ByteOrder -> Int -> Integer -> Either IntegerToByteStringError ByteString +integerToByteString requestedByteOrder requestedLength input + | input < 0 = Left NegativeInput + | input == 0 = Right . BS.replicate requestedLength $ 0x00 + -- We use manual specialization to ensure as few branches in loop bodies as + -- we can. See Note [Manual specialization] for details. + | requestedLength == 0 = Right . Builder.builderBytes $ case requestedByteOrder of + LittleEndian -> goLENoLimit mempty input + BigEndian -> goBENoLimit mempty input + | otherwise = do + let result = case requestedByteOrder of + LittleEndian -> goLELimit mempty input + BigEndian -> goBELimit mempty input + case result of + Nothing -> Left NotEnoughDigits + Just b -> Right . Builder.builderBytes $ b + where + goLELimit :: Builder -> Integer -> Maybe Builder + goLELimit acc remaining + | remaining == 0 = pure $ padLE acc + | otherwise = do + -- builderLength is constant time, so we don't track the length ourselves + guard (Builder.builderLength acc < requestedLength) + -- This allows extracting eight digits at once. See Note [Loop sectioning] for details on + -- why we do this. We also duplicate this code in several places: see Note [Manual + -- specialization] for why. + -- + -- The code is basically equivalent to remaining `quotRem` 2^64, but more efficient. This + -- is for two reasons: firstly, GHC does not optimize divisions into shifts for Integer + -- (even if the divisor is constant), and secondly, the pair generated by `quotRem` costs + -- us as much as 15% peformance, and GHC seems unable to eliminate it. Thus, we have to do + -- it like this instead. + let newRemaining = remaining `unsafeShiftR` 64 + -- Given that remaining must be non-negative, fromInteger here effectively truncates to a + -- Word64, by retaining only the least-significant 8 bytes. + let digitGroup :: Word64 = fromInteger remaining + case newRemaining of + 0 -> finishLELimit acc digitGroup + _ -> goLELimit (acc <> Builder.storable digitGroup) newRemaining + finishLELimit :: Builder -> Word64 -> Maybe Builder + finishLELimit acc remaining + | remaining == 0 = pure $ padLE acc + | otherwise = do + guard (Builder.builderLength acc < requestedLength) + -- This is equivalent to 'remaining `quotRem` 256' followed by a conversion of the + -- remainder, but faster. This is similar to the larger example above, and we do it for + -- the same reasons. + let newRemaining = remaining `unsafeShiftR` 8 + let digit :: Word8 = fromIntegral remaining + finishLELimit (acc <> Builder.word8 digit) newRemaining + -- By separating the case where we don't need to concern ourselves with a + -- user-specified limit, we can avoid branching needlessly, or doing a + -- complex expression check on every loop. See Note [Manual specialization] + -- for why this matters. + goLENoLimit :: Builder -> Integer -> Builder + goLENoLimit acc remaining + | remaining == 0 = acc + | otherwise = let newRemaining = remaining `unsafeShiftR` 64 + digitGroup :: Word64 = fromInteger remaining + in case newRemaining of + 0 -> finishLENoLimit acc digitGroup + _ -> goLENoLimit (acc <> Builder.storable digitGroup) newRemaining + finishLENoLimit :: Builder -> Word64 -> Builder + finishLENoLimit acc remaining + | remaining == 0 = acc + | otherwise = + let newRemaining = remaining `unsafeShiftR` 8 + digit :: Word8 = fromIntegral remaining + in finishLENoLimit (acc <> Builder.word8 digit) newRemaining + padLE :: Builder -> Builder + padLE acc = let paddingLength = requestedLength - Builder.builderLength acc + in acc <> Builder.bytes (BS.replicate paddingLength 0x0) + -- We manually specialize the big-endian case: see Note [Manual specialization] for why. + goBELimit :: Builder -> Integer -> Maybe Builder + goBELimit acc remaining + | remaining == 0 = pure $ padBE acc + | otherwise = do + guard (Builder.builderLength acc < requestedLength) + let newRemaining = remaining `unsafeShiftR` 64 + let digitGroup :: Word64 = fromInteger remaining + case newRemaining of + 0 -> finishBELimit acc digitGroup + _ -> goBELimit (Builder.word64BE digitGroup <> acc) newRemaining + finishBELimit :: Builder -> Word64 -> Maybe Builder + finishBELimit acc remaining + | remaining == 0 = pure $ padBE acc + | otherwise = do + guard (Builder.builderLength acc < requestedLength) + let newRemaining = remaining `unsafeShiftR` 8 + let digit = fromIntegral remaining + finishBELimit (Builder.word8 digit <> acc) newRemaining + goBENoLimit :: Builder -> Integer -> Builder + goBENoLimit acc remaining + | remaining == 0 = acc + | otherwise = let newRemaining = remaining `unsafeShiftR` 64 + digitGroup = fromInteger remaining + in case newRemaining of + 0 -> finishBENoLimit acc digitGroup + _ -> goBENoLimit (Builder.word64BE digitGroup <> acc) newRemaining + finishBENoLimit :: Builder -> Word64 -> Builder + finishBENoLimit acc remaining + | remaining == 0 = acc + | otherwise = let newRemaining = remaining `unsafeShiftR` 8 + digit = fromIntegral remaining + in finishBENoLimit (Builder.word8 digit <> acc) newRemaining + padBE :: Builder -> Builder + padBE acc = let paddingLength = requestedLength - Builder.builderLength acc in + Builder.bytes (BS.replicate paddingLength 0x0) <> acc + +-- | Conversion from 'ByteString' to 'Integer', as per +-- [CIP-121](https://github.com/cardano-foundation/CIPs/tree/master/CIP-0121). +-- +-- For clarity, the stated endianness argument uses 'ByteOrder'. +byteStringToInteger :: ByteOrder -> ByteString -> Integer + -- We use manual specialization to ensure as few branches in loop bodies as we can. See Note + -- [Manual specialization] for details. +byteStringToInteger statedByteOrder input = case statedByteOrder of + -- Since padding bytes in the most-significant-last representation go at + -- the end of the input, we can skip decoding them, as they won't affect + -- the result in any way. + LittleEndian -> case BS.findIndexEnd (/= 0x00) input of + -- If there are no nonzero bytes, it must be zero. + Nothing -> 0 + Just end -> goLE 0 end 0 + -- Since padding bytes in the most-significant-first representation go at + -- the beginning of the input, we can skip decoding them, as they won't + -- affect the result in any way. + BigEndian -> case BS.findIndex (/= 0x00) input of + Nothing -> 0 + Just end -> goBE 0 end 0 (BS.length input - 1) + where + -- Like with toByteString, we use loop sectioning to decode eight digits at once. See Note [Loop + -- sectioning] for why we do this. + goLE :: Integer -> Int -> Int -> Integer + goLE acc limit ix + | ix <= (limit - 7) = + let digitGroup = read64LE ix + -- Same as ix * 8, but faster. GHC might already do this optimization, but we may as + -- well be sure. + shift = ix `unsafeShiftL` 3 + newIx = ix + 8 + -- We use unsafeShiftL to move a group of eight digits into the right position in + -- the result, then combine with the accumulator. This is equivalent to a + -- multiplication by 2^64*k, but significantly faster, as GHC doesn't optimize + -- such multiplications into shifts for Integers. + newAcc = acc + fromIntegral digitGroup `unsafeShiftL` shift + in goLE newAcc limit newIx + | otherwise = finishLE acc limit ix + finishLE :: Integer -> Int -> Int -> Integer + finishLE acc limit ix + | ix > limit = acc + | otherwise = + let digit = BS.index input ix + shift = ix `unsafeShiftL` 3 + newIx = ix + 1 + -- Similarly to before, we use unsafeShiftL to move a single digit into the right + -- position in the result. + newAcc = acc + fromIntegral digit `unsafeShiftL` shift + in finishLE newAcc limit newIx + -- Technically, ByteString does not allow reading of anything bigger than a single byte. + -- However, because ByteStrings are counted arrays, caching already brings in adjacent bytes, + -- which makes fetching them quite cheap. Additionally, GHC appears to optimize this into a + -- block read of 64 bits at once, which saves memory movement. See Note [Superscalarity and + -- caching] for details of why this matters. + read64LE :: Int -> Word64 + read64LE startIx = + fromIntegral (BS.index input startIx) + .|. (fromIntegral (BS.index input (startIx + 1)) `unsafeShiftL` 8) + .|. (fromIntegral (BS.index input (startIx + 2)) `unsafeShiftL` 16) + .|. (fromIntegral (BS.index input (startIx + 3)) `unsafeShiftL` 24) + .|. (fromIntegral (BS.index input (startIx + 4)) `unsafeShiftL` 32) + .|. (fromIntegral (BS.index input (startIx + 5)) `unsafeShiftL` 40) + .|. (fromIntegral (BS.index input (startIx + 6)) `unsafeShiftL` 48) + .|. (fromIntegral (BS.index input (startIx + 7)) `unsafeShiftL` 56) + -- We manually specialize the big-endian cases: see Note [Manual specialization] for why. + -- + -- In the big-endian case, shifts and indexes change in different ways: indexes start _high_ + -- and _reduce_, but shifts start _low_ and rise. This is different to the little-endian case, + -- where both start low and rise. Thus, we track the index and shift separately in the + -- big-endian case: it makes the adjustments easier, and doesn't really change anything, as if + -- we wanted to compute the shift, we'd have to pass an offset argument anyway. + goBE :: Integer -> Int -> Int -> Int -> Integer + goBE acc limit shift ix + | ix >= (limit + 7) = + let digitGroup = read64BE ix + newShift = shift + 64 + newIx = ix - 8 + newAcc = acc + fromIntegral digitGroup `unsafeShiftL` shift + in goBE newAcc limit newShift newIx + | otherwise = finishBE acc limit shift ix + finishBE :: Integer -> Int -> Int -> Int -> Integer + finishBE acc limit shift ix + | ix < limit = acc + | otherwise = + let digit = BS.index input ix + newShift = shift + 8 + newIx = ix - 1 + newAcc = acc + fromIntegral digit `unsafeShiftL` shift + in finishBE newAcc limit newShift newIx + read64BE :: Int -> Word64 + read64BE endIx = + fromIntegral (BS.index input endIx) + .|. (fromIntegral (BS.index input (endIx - 1)) `unsafeShiftL` 8) + .|. (fromIntegral (BS.index input (endIx - 2)) `unsafeShiftL` 16) + .|. (fromIntegral (BS.index input (endIx - 3)) `unsafeShiftL` 24) + .|. (fromIntegral (BS.index input (endIx - 4)) `unsafeShiftL` 32) + .|. (fromIntegral (BS.index input (endIx - 5)) `unsafeShiftL` 40) + .|. (fromIntegral (BS.index input (endIx - 6)) `unsafeShiftL` 48) + .|. (fromIntegral (BS.index input (endIx - 7)) `unsafeShiftL` 56) + +endiannessArgToByteOrder :: Bool -> ByteOrder +endiannessArgToByteOrder b = if b then BigEndian else LittleEndian + +{- Note [Binary bitwise operation implementation and manual specialization] + + All of the 'binary' bitwise operations (namely `andByteString`, + `orByteString` and `xorByteString`) operate similarly: + + 1. Decide which of their two `ByteString` arguments determines the length + of the result. For padding semantics, this is the _longer_ argument, + whereas for truncation semantics, it's the _shorter_ one. If both + `ByteString` arguments have identical length, it doesn't matter which we + choose. + 2. Copy the choice made in step 1 into a fresh mutable buffer. + 3. Traverse over each byte of the argument _not_ chosen in step 1, and + combine each of those bytes with the byte at the corresponding index of + the fresh mutable buffer from step 2 (`.&.` for `andByteString`, + `.|.` for `orByteString`, `xor` for `xorByteString`). + + We also make use of loop sectioning to optimize this operation: see Note + [Loop sectioning] explaining why we do this. Fundamentally, this doesn't + change the logic of the operation, but means that step 3 is split into + two smaller sub-steps: we first word 8 bytes at a time, then one byte at a + time to finish up if necessary. Other than the choice of 'combining + operation', the structure of the computation is the same, which suggests that + we want a helper function with a signature like + + helper1 :: + (Word64 -> Word64 -> Word64) -> + (Word8 -> Word8 -> Word8) -> + ByteString -> + ByteString -> + Int -> + ByteString + + or possibly (to avoid duplicate argument passing) like + + helper2 :: + (forall (a :: Type) . Bits a => a -> a -> a) -> + ByteString -> + ByteString -> + Int -> + ByteString + + This would allow us to share all this logic, and have each of the 'top-level' + operations just dispatch to either of the helpers with the appropriate + function argument(s). Instead, we chose to write a manual copy of this logic + for each of the 'top-level' operations, substituting only the 'combining + operation'. + + We made this choice as any design based on either `helper1` or `helper2` is + significantly slower (at least 50% worse, and the penalty _percentage_ grows + with argument size). While `helper2` is significantly more penalizing than + `helper1`, even `helper1` reaches an almost threefold slowdown at the higher + input sizes we are interested in relative the manual version we use here. + Due to the 'low-level' nature of Plutus Core primops, we consider these costs + unacceptable relative the (small) benefits to code clarity and maintainability + any solution using either style of helper would provide. + + The reason for `helper2` under-performing is unsurprising: any argument whose + type is rank-2 polymorphic with a dictionary constraint essentially acts as + a 'program template', which gets interpreted at runtime given some dictionary + for a `Bits` instance. GHC can do practically nothing to optimize this, as + there is no way to tell, for any given argument, _which_ definitions of an + instance would be required here, even if the set of operations we use is + finite, since any instance can make use of the full power of Haskell, which + essentially lands us in Rice's Theorem territory. For `helper1`, the reasons + are similar: it _must_ be able to work regardless of what functions (assuming + appropriate types) it is given, which means in general, GHC is forced to + compile mother-may-I-style code involving pointer chasing those arguments at + runtime. This explains why the 'blowup' becomes worse with argument length. + + While in theory inlining could help with at least the `helper1` case ( + `helper2` is beyond that technique), it doesn't seem like GHC is able to + figure this out, even with `INLINE` is placed on `helper1`. + -} + +-- | Bitwise logical AND, as per [CIP-122](https://github.com/cardano-foundation/CIPs/tree/master/CIP-0122). +{-# INLINEABLE andByteString #-} +andByteString :: Bool -> ByteString -> ByteString -> ByteString +andByteString shouldPad bs1 bs2 = + let (shorter, longer) = if BS.length bs1 < BS.length bs2 then (bs1, bs2) else (bs2, bs1) + (toCopy, toTraverse) = if shouldPad then (longer, shorter) else (shorter, longer) + in go toCopy toTraverse (BS.length shorter) + where + go :: ByteString -> ByteString -> Int -> ByteString + go toCopy toTraverse traverseLen = + unsafeDupablePerformIO . BS.useAsCStringLen toCopy $ \(copyPtr, copyLen) -> + BS.useAsCString toTraverse $ \traversePtr -> do + BSI.create copyLen $ \dstPtr -> do + copyBytes dstPtr (castPtr copyPtr) copyLen + let (bigStrides, littleStrides) = traverseLen `quotRem` 8 + let offset = bigStrides * 8 + let bigDstPtr :: Ptr Word64 = castPtr dstPtr + let bigTraversePtr :: Ptr Word64 = castPtr traversePtr + for_ [0 .. bigStrides - 1] $ \i -> do + w64_1 <- peekElemOff bigDstPtr i + w64_2 <- peekElemOff bigTraversePtr i + pokeElemOff bigDstPtr i $ w64_1 Bits..&. w64_2 + let smallDstPtr :: Ptr Word8 = plusPtr dstPtr offset + let smallTraversePtr :: Ptr Word8 = plusPtr traversePtr offset + for_ [0 .. littleStrides - 1] $ \i -> do + w8_1 <- peekElemOff smallDstPtr i + w8_2 <- peekElemOff smallTraversePtr i + pokeElemOff smallDstPtr i $ w8_1 Bits..&. w8_2 + +-- | Bitwise logical OR, as per [CIP-122](https://github.com/cardano-foundation/CIPs/tree/master/CIP-0122). +{-# INLINEABLE orByteString #-} +orByteString :: Bool -> ByteString -> ByteString -> ByteString +orByteString shouldPad bs1 bs2 = + let (shorter, longer) = if BS.length bs1 < BS.length bs2 then (bs1, bs2) else (bs2, bs1) + (toCopy, toTraverse) = if shouldPad then (longer, shorter) else (shorter, longer) + in go toCopy toTraverse (BS.length shorter) + where + go :: ByteString -> ByteString -> Int -> ByteString + go toCopy toTraverse traverseLen = + unsafeDupablePerformIO . BS.useAsCStringLen toCopy $ \(copyPtr, copyLen) -> + BS.useAsCString toTraverse $ \traversePtr -> do + BSI.create copyLen $ \dstPtr -> do + copyBytes dstPtr (castPtr copyPtr) copyLen + let (bigStrides, littleStrides) = traverseLen `quotRem` 8 + let offset = bigStrides * 8 + let bigDstPtr :: Ptr Word64 = castPtr dstPtr + let bigTraversePtr :: Ptr Word64 = castPtr traversePtr + for_ [0 .. bigStrides - 1] $ \i -> do + w64_1 <- peekElemOff bigDstPtr i + w64_2 <- peekElemOff bigTraversePtr i + pokeElemOff bigDstPtr i $ w64_1 Bits..|. w64_2 + let smallDstPtr :: Ptr Word8 = plusPtr dstPtr offset + let smallTraversePtr :: Ptr Word8 = plusPtr traversePtr offset + for_ [0 .. littleStrides - 1] $ \i -> do + w8_1 <- peekElemOff smallDstPtr i + w8_2 <- peekElemOff smallTraversePtr i + pokeElemOff smallDstPtr i $ w8_1 Bits..|. w8_2 + +-- | Bitwise logical XOR, as per [CIP-122](https://github.com/cardano-foundation/CIPs/tree/master/CIP-0122). +{-# INLINEABLE xorByteString #-} +xorByteString :: Bool -> ByteString -> ByteString -> ByteString +xorByteString shouldPad bs1 bs2 = + let (shorter, longer) = if BS.length bs1 < BS.length bs2 then (bs1, bs2) else (bs2, bs1) + (toCopy, toTraverse) = if shouldPad then (longer, shorter) else (shorter, longer) + in go toCopy toTraverse (BS.length shorter) + where + go :: ByteString -> ByteString -> Int -> ByteString + go toCopy toTraverse traverseLen = + unsafeDupablePerformIO . BS.useAsCStringLen toCopy $ \(copyPtr, copyLen) -> + BS.useAsCString toTraverse $ \traversePtr -> do + BSI.create copyLen $ \dstPtr -> do + copyBytes dstPtr (castPtr copyPtr) copyLen + let (bigStrides, littleStrides) = traverseLen `quotRem` 8 + let offset = bigStrides * 8 + let bigDstPtr :: Ptr Word64 = castPtr dstPtr + let bigTraversePtr :: Ptr Word64 = castPtr traversePtr + for_ [0 .. bigStrides - 1] $ \i -> do + w64_1 <- peekElemOff bigDstPtr i + w64_2 <- peekElemOff bigTraversePtr i + pokeElemOff bigDstPtr i $ Bits.xor w64_1 w64_2 + let smallDstPtr :: Ptr Word8 = plusPtr dstPtr offset + let smallTraversePtr :: Ptr Word8 = plusPtr traversePtr offset + for_ [0 .. littleStrides - 1] $ \i -> do + w8_1 <- peekElemOff smallDstPtr i + w8_2 <- peekElemOff smallTraversePtr i + pokeElemOff smallDstPtr i $ Bits.xor w8_1 w8_2 + +-- | Bitwise logical complement, as per [CIP-122](https://github.com/cardano-foundation/CIPs/tree/master/CIP-0122). +{-# INLINEABLE complementByteString #-} +complementByteString :: ByteString -> ByteString +complementByteString bs = unsafeDupablePerformIO . BS.useAsCStringLen bs $ \(srcPtr, len) -> do + -- We use loop sectioning here; see Note [Loop sectioning] as to why we do this + let (bigStrides, littleStrides) = len `quotRem` 8 + let offset = bigStrides * 8 + BSI.create len $ \dstPtr -> do + let bigSrcPtr :: Ptr Word64 = castPtr srcPtr + let bigDstPtr :: Ptr Word64 = castPtr dstPtr + for_ [0 .. bigStrides - 1] $ \i -> do + w64 <- peekElemOff bigSrcPtr i + pokeElemOff bigDstPtr i . Bits.complement $ w64 + let smallSrcPtr :: Ptr Word8 = plusPtr srcPtr offset + let smallDstPtr :: Ptr Word8 = plusPtr dstPtr offset + for_ [0 .. littleStrides - 1] $ \i -> do + w8 <- peekElemOff smallSrcPtr i + pokeElemOff smallDstPtr i . Bits.complement $ w8 + +-- | Bit read at index, as per [CIP-122](https://github.com/cardano-foundation/CIPs/tree/master/CIP-0122) +{-# INLINEABLE readBit #-} +readBit :: ByteString -> Int -> BuiltinResult Bool +readBit bs ix + | ix < 0 = do + emit "readBit: index out of bounds" + emit $ "Index: " <> (pack . show $ ix) + evaluationFailure + | ix >= len * 8 = do + emit "readBit: index out of bounds" + emit $ "Index: " <> (pack . show $ ix) + evaluationFailure + | otherwise = do + let (bigIx, littleIx) = ix `quotRem` 8 + let flipIx = len - bigIx - 1 + pure $ Bits.testBit (BS.index bs flipIx) littleIx + where + len :: Int + len = BS.length bs + +-- | Bulk bit write, as per [CIP-122](https://github.com/cardano-foundation/CIPs/tree/master/CIP-0122) +{-# INLINEABLE writeBits #-} +writeBits :: ByteString -> [(Integer, Bool)] -> BuiltinResult ByteString +writeBits bs changelist = case unsafeDupablePerformIO . try $ go of + Left (WriteBitsException i) -> do + emit "writeBits: index out of bounds" + emit $ "Index: " <> (pack . show $ i) + evaluationFailure + Right result -> pure result + where + -- This is written in a somewhat strange way. See Note [writeBits and + -- exceptions], which covers why we did this. + go :: IO ByteString + go = BS.useAsCString bs $ \srcPtr -> + BSI.create len $ \dstPtr -> do + copyBytes dstPtr (castPtr srcPtr) len + traverse_ (setAtIx dstPtr) changelist + len :: Int + len = BS.length bs + bitLen :: Integer + bitLen = fromIntegral len * 8 + setAtIx :: Ptr Word8 -> (Integer, Bool) -> IO () + setAtIx ptr (i, b) + | i < 0 = throw $ WriteBitsException i + | i >= bitLen = throw $ WriteBitsException i + | otherwise = do + let (bigIx, littleIx) = i `quotRem` 8 + let flipIx = len - fromIntegral bigIx - 1 + w8 :: Word8 <- peekByteOff ptr flipIx + let toWrite = if b + then Bits.setBit w8 . fromIntegral $ littleIx + else Bits.clearBit w8 . fromIntegral $ littleIx + pokeByteOff ptr flipIx toWrite + +-- | Byte replication, as per [CIP-122](https://github.com/cardano-foundation/CIPs/tree/master/CIP-0122) +replicateByte :: Int -> Word8 -> BuiltinResult ByteString +replicateByte len w8 + | len < 0 = do + emit "replicateByte: negative length requested" + evaluationFailure + | otherwise = pure . BS.replicate len $ w8 + +{- Note [Shift and rotation implementation] + +Both shifts and rotations work similarly: they effectively impose a 'write +offset' to bits in the data argument, then write those bits to the result +with this offset applied. The difference between them is in what should be +done if the resulting offset index would fall out of bounds: shifts just +discard the data (and fill whatever remains with zeroes), while rotations +'wrap around' modularly. This operation is bit parallel by definition, thus +theoretically making it amenable to the techniques described in Note [Bit +parallelism and loop sectioning]. + +However, the naive way of doing this runs into a problem: the byte ordering +on Tier 1 platforms inside `Word64` means that consecutive bit indexes +according to CIP-122 don't remain that way. We could avoid this by using a +byte flip followed by an adjustment in the opposite direction, then a byte flip +back again. However, this is a costly operation, and would also be extremely +fiddly across stride boundaries, making both performance and implementation +clarity suffer. Instead, we use a different observation, namely that both +shifts and rotations on the same input are monoidally homomorphic into +natural number addition (assuming the same 'direction' for shifts). Using +this, combined with Euclidean division, we can decompose any shift or +rotation by `i` into two consecutive shifts and rotations: + +1. A 'large' shift or rotation, by `div i 8`; and +2. A 'small' shift or rotation, by `mod i 8`. + +While on paper, this seems much less efficient (as our stride is smaller), +we also observe that the 'large' shift moves around whole bytes, while also +keeping consecutive bytes consecutive, assuming their bit indices remain +in-bounds. This means that we can implement step 1 both simply and efficiently: + +* For shifts, we perform a partial copy of all the bytes whose bits remain + in-bounds, followed by clearing of whatever remains. +* For rotations, we perform two partial copies: first of all the bytes whose + bits remain in-bounds, followed by whatever remains, at the 'opposite end'. + +These can make use of the bulk copying and clearing operations provided by the +GHC runtime. Not only are these shorter and more readable, they are also _far_ +more efficient than anything we could do, as they rely on optimized C called +via the runtime (meaning no FFI penalty). From our experiments, both with +these operations, and others from CIP-122, we note that the cost of these is +essentially constant up to about the size of 1-2 cache lines (64-128 bytes): +since we anticipate smaller inputs are far more likely, this actually runs +_faster_ than our proposed sectioning approach, while being easier to read +and write. + +It is arguable that our approach forces 'double writing', as Step 2 has to +possibly overwrite our work in Step 1. However, by avoiding the need to +perform byte flips, as well as benefitting from the huge speedups gained +from our split approach, this cost is essentially negligible, especially +given that we can operate mutably throughout. We also have an additional +benefit: if the requested rotation or shift happens to be an exact multiple +of 8, we can be _much_ faster, as Step 2 becomes unnecessary in that case. +-} + +-- | Shifts, as per [CIP-123](https://github.com/mlabs-haskell/CIPs/blob/koz/bitwise/CIP-0123/README.md). +shiftByteString :: ByteString -> Int -> ByteString +shiftByteString bs bitMove + | BS.null bs = bs + | bitMove == 0 = bs + | otherwise = unsafeDupablePerformIO . BS.useAsCString bs $ \srcPtr -> + BSI.create len $ \dstPtr -> do + -- To simplify our calculations, we work only with absolute values, + -- letting different functions control for direction, instead of + -- trying to unify the scheme for both positive and negative shifts. + let magnitude = abs bitMove + -- Instead of worrying about partial clearing, we just zero the entire + -- block of memory, as the cost is essentially negligible and saves us + -- a bunch of offset arithmetic. + fillBytes dstPtr 0x00 len + unless (magnitude >= bitLen) $ do + let (bigShift, smallShift) = magnitude `quotRem` 8 + case signum bitMove of + (-1) -> negativeShift (castPtr srcPtr) dstPtr bigShift smallShift + _ -> positiveShift (castPtr srcPtr) dstPtr bigShift smallShift + where + len :: Int + !len = BS.length bs + bitLen :: Int + !bitLen = len * 8 + negativeShift :: Ptr Word8 -> Ptr Word8 -> Int -> Int -> IO () + negativeShift srcPtr dstPtr bigShift smallShift = do + let copyDstPtr = plusPtr dstPtr bigShift + let copyLen = len - bigShift + -- Since we already zeroed everything, we only do the partial copy. + copyBytes copyDstPtr srcPtr copyLen + when (smallShift > 0) $ do + -- When working with the small shift, we have to shift bits across + -- byte boundaries. Thus, we have to make sure that: + -- + -- 1. We 'save' our first byte from being processed. + -- 2. We can 'select' the bits that would be shifted over the + -- boundary and apply them. + let !invSmallShift = 8 - smallShift + let !mask = 0xFF `Bits.unsafeShiftR` invSmallShift + for_ [len - 1, len - 2 .. len - copyLen] $ \byteIx -> do + -- To handle shifts across byte boundaries, we have to 'read + -- backwards', mask off the relevant part, then recombine. + !(currentByte :: Word8) <- peekByteOff dstPtr byteIx + !(prevByte :: Word8) <- peekByteOff dstPtr (byteIx - 1) + let !prevOverflowBits = prevByte Bits..&. mask + let !newCurrentByte = + (currentByte `Bits.unsafeShiftR` smallShift) + Bits..|. (prevOverflowBits `Bits.unsafeShiftL` invSmallShift) + pokeByteOff dstPtr byteIx newCurrentByte + !(firstByte :: Word8) <- peekByteOff dstPtr (len - copyLen - 1) + pokeByteOff dstPtr (len - copyLen - 1) (firstByte `Bits.unsafeShiftR` smallShift) + -- This works similarly to `negativeShift` above, but in the opposite direction. + positiveShift :: Ptr Word8 -> Ptr Word8 -> Int -> Int -> IO () + positiveShift srcPtr dstPtr bigShift smallShift = do + let copySrcPtr = plusPtr srcPtr bigShift + let copyLen = len - bigShift + copyBytes dstPtr copySrcPtr copyLen + when (smallShift > 0) $ do + let !invSmallShift = 8 - smallShift + let !mask = 0xFF `Bits.unsafeShiftL` invSmallShift + for_ [0, 1 .. copyLen - 2] $ \byteIx -> do + !(currentByte :: Word8) <- peekByteOff dstPtr byteIx + !(nextByte :: Word8) <- peekByteOff dstPtr (byteIx + 1) + let !nextOverflowBits = nextByte Bits..&. mask + let !newCurrentByte = + (currentByte `Bits.unsafeShiftL` smallShift) + Bits..|. (nextOverflowBits `Bits.unsafeShiftR` invSmallShift) + pokeByteOff dstPtr byteIx newCurrentByte + !(lastByte :: Word8) <- peekByteOff dstPtr (copyLen - 1) + pokeByteOff dstPtr (copyLen - 1) (lastByte `Bits.unsafeShiftL` smallShift) + +-- | Rotations, as per [CIP-123](https://github.com/mlabs-haskell/CIPs/blob/koz/bitwise/CIP-0123/README.md). +rotateByteString :: ByteString -> Int -> ByteString +rotateByteString bs bitMove + | BS.null bs = bs + | otherwise = + -- To save ourselves some trouble, we work only with absolute rotations + -- (letting argument sign handle dispatch to dedicated 'directional' + -- functions, like for shifts), and also simplify rotations larger than + -- the bit length to the equivalent value modulo the bit length, as + -- they're equivalent. + let !magnitude = abs bitMove + !reducedMagnitude = magnitude `rem` bitLen + in if reducedMagnitude == 0 + then bs + else unsafeDupablePerformIO . BS.useAsCString bs $ \srcPtr -> + BSI.create len $ \dstPtr -> do + let (bigRotation, smallRotation) = reducedMagnitude `quotRem` 8 + case signum bitMove of + (-1) -> negativeRotate (castPtr srcPtr) dstPtr bigRotation smallRotation + _ -> positiveRotate (castPtr srcPtr) dstPtr bigRotation smallRotation + where + len :: Int + !len = BS.length bs + bitLen :: Int + !bitLen = len * 8 + negativeRotate :: Ptr Word8 -> Ptr Word8 -> Int -> Int -> IO () + negativeRotate srcPtr dstPtr bigRotate smallRotate = do + -- Two partial copies are needed here, unlike with shifts, because + -- there's no point zeroing our data, since it'll all be overwritten + -- with stuff from the input anyway. + let copyStartDstPtr = plusPtr dstPtr bigRotate + let copyStartLen = len - bigRotate + copyBytes copyStartDstPtr srcPtr copyStartLen + let copyEndSrcPtr = plusPtr srcPtr copyStartLen + copyBytes dstPtr copyEndSrcPtr bigRotate + when (smallRotate > 0) $ do + -- This works similarly as for shifts. + let invSmallRotate = 8 - smallRotate + let !mask = 0xFF `Bits.unsafeShiftR` invSmallRotate + !(cloneLastByte :: Word8) <- peekByteOff dstPtr (len - 1) + for_ [len - 1, len - 2 .. 1] $ \byteIx -> do + !(currentByte :: Word8) <- peekByteOff dstPtr byteIx + !(prevByte :: Word8) <- peekByteOff dstPtr (byteIx - 1) + let !prevOverflowBits = prevByte Bits..&. mask + let !newCurrentByte = + (currentByte `Bits.unsafeShiftR` smallRotate) + Bits..|. (prevOverflowBits `Bits.unsafeShiftL` invSmallRotate) + pokeByteOff dstPtr byteIx newCurrentByte + !(firstByte :: Word8) <- peekByteOff dstPtr 0 + let !lastByteOverflow = cloneLastByte Bits..&. mask + let !newLastByte = + (firstByte `Bits.unsafeShiftR` smallRotate) + Bits..|. (lastByteOverflow `Bits.unsafeShiftL` invSmallRotate) + pokeByteOff dstPtr 0 newLastByte + positiveRotate :: Ptr Word8 -> Ptr Word8 -> Int -> Int -> IO () + positiveRotate srcPtr dstPtr bigRotate smallRotate = do + let copyStartSrcPtr = plusPtr srcPtr bigRotate + let copyStartLen = len - bigRotate + copyBytes dstPtr copyStartSrcPtr copyStartLen + let copyEndDstPtr = plusPtr dstPtr copyStartLen + copyBytes copyEndDstPtr srcPtr bigRotate + when (smallRotate > 0) $ do + let !invSmallRotate = 8 - smallRotate + let !mask = 0xFF `Bits.unsafeShiftL` invSmallRotate + !(cloneFirstByte :: Word8) <- peekByteOff dstPtr 0 + for_ [0, 1 .. len - 2] $ \byteIx -> do + !(currentByte :: Word8) <- peekByteOff dstPtr byteIx + !(nextByte :: Word8) <- peekByteOff dstPtr (byteIx + 1) + let !nextOverflowBits = nextByte Bits..&. mask + let !newCurrentByte = + (currentByte `Bits.unsafeShiftL` smallRotate) + Bits..|. (nextOverflowBits `Bits.unsafeShiftR` invSmallRotate) + pokeByteOff dstPtr byteIx newCurrentByte + !(lastByte :: Word8) <- peekByteOff dstPtr (len - 1) + let !firstOverflowBits = cloneFirstByte Bits..&. mask + let !newLastByte = + (lastByte `Bits.unsafeShiftL` smallRotate) + Bits..|. (firstOverflowBits `Bits.unsafeShiftR` invSmallRotate) + pokeByteOff dstPtr (len - 1) newLastByte + +-- | Counting the number of set bits, as per [CIP-123](https://github.com/mlabs-haskell/CIPs/blob/koz/bitwise/CIP-0123/README.md). +countSetBits :: ByteString -> Int +countSetBits bs = unsafeDupablePerformIO . BS.useAsCString bs $ \srcPtr -> do + -- See Note [Loop sectioning] for details of why we + -- define this function the way it is. We make use of the fact that `popCount` + -- is bit-parallel, and has a constant-time implementation for `Word64` and `Word8`. + let bigSrcPtr :: Ptr Word64 = castPtr srcPtr + let smallSrcPtr :: Ptr Word8 = plusPtr srcPtr offset + goBig bigSrcPtr smallSrcPtr 0 0 + where + len :: Int + !len = BS.length bs + -- We do this as two separate bindings, for similar reasons as for + -- `integerToByteString`: we take a surprising hit to performance when + -- using a pair, even though eliminating it should be possible here. + bigStrides :: Int + !bigStrides = len `quot` 8 + smallStrides :: Int + !smallStrides = len `rem` 8 + offset :: Int + !offset = bigStrides * 8 + goBig :: Ptr Word64 -> Ptr Word8 -> Int -> Int -> IO Int + goBig !bigSrcPtr !smallSrcPtr !acc !bigIx + | bigIx == bigStrides = goSmall smallSrcPtr acc 0 + | otherwise = do + !w64 <- peekElemOff bigSrcPtr bigIx + goBig bigSrcPtr smallSrcPtr (acc + Bits.popCount w64) (bigIx + 1) + goSmall :: Ptr Word8 -> Int -> Int -> IO Int + goSmall !smallSrcPtr !acc !smallIx + | smallIx == smallStrides = pure acc + | otherwise = do + !w8 <- peekElemOff smallSrcPtr smallIx + goSmall smallSrcPtr (acc + Bits.popCount w8) (smallIx + 1) + +-- | Finding the first set bit's index, as per [CIP-123](https://github.com/mlabs-haskell/CIPs/blob/koz/bitwise/CIP-0123/README.md). +findFirstSetBit :: ByteString -> Int +findFirstSetBit bs = unsafeDupablePerformIO . BS.useAsCString bs $ \srcPtr -> do + let bigSrcPtr :: Ptr Word64 = castPtr srcPtr + goBig bigSrcPtr 0 (len - 8) + where + -- We implement this operation in a somewhat unusual way, to try and + -- benefit from bit paralellism, thus allowing loop sectioning as well: + -- see Note [Loop sectioning] as to why we choose to + -- do this. + -- + -- Finding the first set bit is not (inherently) bit parallel, as there is + -- a clear 'horizontal dependency'. Thus, we instead 'localize' this + -- 'horizontal dependency' by noting that the following operations _are_ + -- bit-parallel: + -- + -- 1. Checking if all bits are zero + -- 2. Keeping an additive accumulator + -- + -- Essentially, we begin by taking large steps through our data, checking + -- whether we only have zeroes. This can be done in strides of 64 bits at a + -- time, and every time we find that many zeroes, we keep track. After we + -- encounter a nonzero `Word64`, we 'step down' to `Word8`-sized steps, + -- continuing to count zero blocks the same way. Once we encounter a + -- non-zero `Word8`, we can resort to the specialized operation for + -- counting trailing zeroes from `Data.Bits`, and 'top up' our accumulated + -- count to produce the index we want. If we ever 'walk off the end', we + -- know that there's no way we could find any set bits and return -1. + -- + -- This is complicated slightly by us having to walk the input backwards + -- instead of forwards, but due to the requirements of the CIP-122 bit + -- indexing scheme, we don't really have a choice here. This doesn't + -- affect the description above however: it just complicates the indexing + -- maths required. + goBig :: Ptr Word64 -> Int -> Int -> IO Int + goBig !bigSrcPtr !acc !byteIx + | byteIx >= 0 = do + !(w64 :: Word64) <- peekByteOff bigSrcPtr byteIx + -- In theory, we could use the same technique here as we do in + -- `goSmall`, namely count speculatively and then compare to 64. + -- However this is not possible for us, as the native byte ordering + -- on Tier 1 platforms does not keep consecutive bits _across_ bytes + -- consecutive, which would make this result unreliable. While we + -- _could_ do a byte order flip before counting (from the opposite + -- end) to avoid this, the cost of this operation is much larger + -- than a comparison to zero, and would only benefit us _once_, + -- instead of once-per-stride. Thus, we instead use the approach + -- here. + if w64 == 0x0 + then goBig bigSrcPtr (acc + 64) (byteIx - 8) + else goSmall (castPtr bigSrcPtr) acc (byteIx + 7) + | byteIx <= (-8) = pure (-1) + | otherwise = goSmall (castPtr bigSrcPtr) 0 (8 + byteIx - 1) + goSmall :: Ptr Word8 -> Int -> Int -> IO Int + goSmall !smallSrcPtr !acc !byteIx + | byteIx < 0 = pure (-1) + | otherwise = do + !(w8 :: Word8) <- peekByteOff smallSrcPtr byteIx + -- Instead of redundantly first checking for a zero byte, + -- then counting, we speculatively count, relying on the behaviour of + -- `countTrailingZeros` that, on a zero byte, we get 8. + let !counted = Bits.countTrailingZeros w8 + let !newAcc = acc + counted + if counted == 8 + then goSmall smallSrcPtr newAcc (byteIx - 1) + else pure newAcc + len :: Int + !len = BS.length bs + +-- Helpers + +{- Note [writeBits and exceptions] + + As `writeBits` allows us to pass a changelist argument of any length, we + potentially could have an out-of-bounds index anywhere in the list. As we + have to fail on such cases (and report them appropriately), we end up needing + _both_ IO (to do mutable things) as well as a way to signal errors. We can + do this in two ways: + + 1. Pre-scan the changelist for any out-of-bounds indexes, fail if we see any, + then apply the necessary changes if no out-of-bounds indexes are found. + 2. Speculatively allocate the new `ByteString`, then do the changes in the + changelist argument one at a time, failing as soon as we see an out-of-bounds + index. + + Option 1 would require traversing the changelist argument twice, which is + undesirable, which means that option 2 is the more efficient choice. The + natural choice for option 2 would be something similar to `ExceptT Int IO` + (with the `Int` being an out-of-bounds index). However, we aren't able to do + this, as ultimately, `ByteString`s are implemented as `ForeignPtr`s, forcing + us to use the following function to interact with them, directly or not: + + withForeignPtr :: forall (a :: Type) . ForeignPtr a -> (Ptr a -> IO b) -> IO b + + Notably, the function argument produces a result of `IO b`, whereas we would + need `MonadIO m => m b` instead. This means that our _only_ choice is to + use the exception mechanism, either directly or via some wrappers like + `MonadUnliftIO`. While this is unusual, and arguably against the spirit of + the use of `IO` relative `ByteString` construction, we don't have any other + choice. We decided to use the exception mechanism directly, as while + `MonadUnliftIO` is a bit cleaner, it ultimately ends up doing the same thing + anyway, and this method at least makes it clear what we're doing. + + This doesn't pose any problems from the point of view of Plutus Core, as this + exception cannot 'leak'; we handle it entirely within `writeBits`, and no + other Plutus Core code can ever see it. +-} +newtype WriteBitsException = WriteBitsException Integer + deriving stock (Eq, Show) + +instance Exception WriteBitsException + +{- Note [Manual specialization] +For both integerToByteString and byteStringToInteger, we have to perform very +similar operations, but with small variations: + +- Most-significant-first versus most-significant-last (for both) +- Whether we have a size limit or not (for integerToByteString) + +Additionally, loop sectioning (see Note [Loop sectioning]) requires us to have +separate 'big-stride' and 'small-stride' operations to ensure universality of +input handling. Lastly, we have several subroutines (digit extraction, for +instance) that may vary in similar ways. In such a case, generalization by +means of abstraction seems like a good idea, as the operations (and +subroutines) vary little. + +At the same time, to determine which variation of any given function (or +subroutine) we need, we only have to scrutinize the relevant argument(s) once: +these specifics (such as byte order) don't change during the course of the +operation. Thus, we want to make sure that these checks in the code are _also_ +performed only once, ideally at the beginning. + +However, if we write such operations naively as so: + +> subroutine byteOrder arg1 arg2 = case byteOrder of +> LittleEndian -> ... +> BigEndian -> ... + +the byteOrder argument will be scrutinized on each call of subroutine. This is +correct in general (as there is no guarantee that the argument will be stable). +Strangely, however, even in a case like this one: + +> mainRoutine byteOrder arg1 arg2 = ... +> where +> subroutine arg3 = case byteOrder of +> LittleEndian -> ... +> BigEndian -> ... + +GHC _still_ re-scrutinizes byteOrder in every call of subroutine! This penalty +can be somewhat lessened using a form similar to this: + +> mainRoutine byteOrder arg1 arg2 = ... +> where +> !subroutine = case byteOrder of +> LittleEndian -> \arg3 -> ... +> BigEndian -> \arg3 -> ... + +but this is _still_ between 20 and 30% worse than doing something like this: + +> mainRoutine byteOrder arg1 arg2 = case byteOrder of +> LittleEndian -> [code calling subroutineLE where needed] +> BigEndian -> [code calling subroutineBE where needed] +> where +> subroutineLE arg3 = ... +> subroutineBE arg3 = ... + +This form _ensures_ we scrutinize (and branch) only the number of times we have +to, and in a predictable place. Since these are backends for Plutus Core primops, +and performance is thus critical, we choose to use this manually-specialized form +for each combination of relevant arguments. While this is repetitive, and thus +also somewhat error-prone, the performance penalty for not doing this is +unacceptable. +-} + +{- Note [Loop sectioning] + +Several operations in this module (including binary logical operations, +`integerToByteString` and `byteStringToInteger`) effectively function as loops +over fixed-width binary chunks: these can be bytes (for logical operations), +digits (for conversions) or something else. These chunks have to be read, +written or both, and may also require processing using fixed-width, +constant-time operations over those chunks from the Haskell side, in some +cases effectively 'translating' these fixed-size operations into variable-width +equivalents over `ByteString`s. In all cases, this involves trafficking data +between memory and machine registers (as `ByteString`s and `Integer`s are both +wrappers around counted arrays), as well as the overheads of looping +(involving comparison and branches). This trafficking is necessary not only to +move the memory around, but also to process it, as on modern architectures, +data must first be moved into a register in order to do anything with it. + +However, on all architectures of interest (essentially, 64-bit Tier 1), +general-purpose registers (GPRs henceforth) are 64 bits (or 8 bytes). +Furthermore, the primary cost of moving data between memory and registers is +having to overcome the 'memory wall': the exact amount of data being moved +doesn't affect this very much. In addition to this, when we operate on single +bytes, the remaining 56 bits of the GPR holding that data are essentially +'wasted'. In the situation we have (namely, operating over arrays, whose data +is adjacent in memory), we thus get two sources of inefficiency: + +* Despite paying the cost for a memory transfer, we transfer only one-eighth + the data we could; and +* Despite transferring data from memory to registers, we utilize the register + at only one-eighth capacity. + +This essentially means we perform _eight times_ more rotations of the loop, +and memory moves, than we need to! + +To avoid this, we use a technique known as _loop sectioning_. +Effectively, this transforms our homogenous loop (that always operates one byte at +a time) into a heterogenous loop: first, we operate on a larger section (called +a _stride_) until we can no longer do this, and then we finish up using byte +at a time processing. Essentially, when given an input like this: + +[ b1, b2, b3, b4, b5, b6, b7, b8, b9, b10 ] + +the homogeous byte-at-a-time approach would process it like so: + + _ _ _ _ _ _ _ _ _ _ +[ b1, b2, b3, b4, b5, b6, b7, b8, b9, b10 ] + +for a total of 10 memory transfers and 10 loop spins, whereas a loop-sectioned +approach with a stride of 8 would instead process like so: + + ______________________________ _ _ +[ b1, b2, b3, b4, b5, b6, b7, b8, b9, b10 ] + +Giving us only _three_ memory transfers and _three_ loop spins instead. This +effectively reduces our work by a factor of 8. In our cases, this is almost +free, as there is no data processing to be done: all we need to do is copy +from one place to another, essentially. + +This technique only benefits us because counted arrays are cache-friendly: see +Note [Superscalarity and caching] for a longer explanation of this and why it +matters. + +Further information: + +- Tier 1 GHC platform list: + https://gitlab.haskell.org/ghc/ghc/-/wikis/platforms#tier-1-platforms +- Memory wall: + https://link.springer.com/referenceworkentry/10.1007/978-0-387-09766-4_234 +- Loop sectioning in more detail: + http://physics.ujep.cz/~zmoravec/prga/main_for/mergedProjects/optaps_for/common/optaps_vec_mine.htm +-} + +{- Note [Superscalarity and caching] +On modern architectures, in order to process data, it must first be moved from +memory into a register. This operation has some cost (known as the 'memory wall'), +which is largely independent of how much data gets moved (assuming the register +can hold it): moving one byte, or a whole register's worth, costs about the same. +To reduce this cost, CPU manufacturers have introduced _cache hierarchies_, +which are designed to limit the cost of the wall, as long as the data access +matches the cache's optimal usage pattern. Thus, while an idealized view of +the memory hierachy is this: + +Registers +--------- +Memory + +in reality, the view is more like this: + +Registers +--------- +L1 cache +--------- +L2 cache +--------- +L3 cache (on some platforms) +--------- +Memory + +Each 'higher' cache in the hierarchy is smaller, but faster, and when a memory +fetch is requested in code, in addition to moving the requested data to a +register, that data (plus some more) is moved into caches as well. The amount +of data moved into cache (a _cache line_) is typically eight machine words on +modern architectures (and definitely is the case for all Tier 1 GHC platforms): +for the cases concerning Plutus, that is 64 bytes. Therefore, if data we need +soon after a fetch is _physically_ nearby, it won't need to be fetched from +memory: instead, it would come from a cache, which is faster (by a considerable +margin). + +To see how this can matter, consider the following ByteString: + +[ b1, b2, b3, b4, b5, b6, b7, b8, b9, b10, b11 ] + +The ByteString (being a counted array) has all of its data physically adjacent +to each other. Suppose we wanted to fetch the byte at index 1 (second position). +The naive view of what happens is like this: + +Registers: [b2] [ ] [ ] .... [ ] +Memory: [ b1, b2, b3, b4, b5, b6, b7, b8, b9, b10, b11 ] + +Thus, it would appear that, if we wanted a different position's value, we would +need to fetch from memory again. However, what _actually_ happens is more like this: + +Registers: [b2] [ ] [ ] .... [ ] +L1 cache: [ b2, b3, b4, b5, b6, b7, b8, b9, b10, b11 ] +Memory: [ b1, b2, b3, b4, b5, b6, b7, b8, b9, b10, b11 ] + +We note that b2, as well as its adjacent elements, were _all_ pulled into the L1 +cache. This can only work because all these elements are physically adjacent in +memory. The improvement in performance from this cache use is _very_ non-trivial: +an L1 cache is about 200 times faster than a memory access, and an L2 cache about +20 times faster. + +To take further advantage of this, modern CPUs (and all Tier 1 GHC platforms have +this capability) are _superscalar_. To explain what this means, let's consider the +naive view of how CPUs execute instructions: namely, it is one-at-a-time, and +synchronous. While CPUs must give the _appearance_ that they behave this way, in +practice, CPU execution is very much asynchronous: due to the proliferation of ALUs +on a single chip, having twice as many processing units is much cheaper than having +processing units run twice as fast. Thus, if there are no data dependencies +between instructions, CPUs can (and do!) execute them simultaneously, stalling to +await results if a data dependency is detected. This can be done automatically +using Tomasulo's algorithm, which ensures no conflicts with maximum throughput. + +Superscalarity interacts well with the cache hierarchy, as it makes data more +easily available for processing, provided there is enough 'work to do', and no +data dependencies. In our situation, most of what we do is data _movement_ from +one memory location to another, which by its very nature lacks any data +dependencies. + +Further references: + +- Numbers for cache and memory transfers: https://gist.github.com/jboner/2841832 +- Superscalarity: https://en.wikipedia.org/wiki/Superscalar_processor +- Tomasulo's algorithm: https://en.wikipedia.org/wiki/Tomasulo%27s_algorithm +-} diff --git a/plutus-core/plutus-core/src/PlutusCore/Bitwise/Convert.hs b/plutus-core/plutus-core/src/PlutusCore/Bitwise/Convert.hs deleted file mode 100644 index bd6ccd317eb..00000000000 --- a/plutus-core/plutus-core/src/PlutusCore/Bitwise/Convert.hs +++ /dev/null @@ -1,544 +0,0 @@ --- editorconfig-checker-disable-file - -{-# LANGUAGE BangPatterns #-} -{-# LANGUAGE MagicHash #-} -{-# LANGUAGE OverloadedStrings #-} - --- | Implementations for conversion primops from 'Integer' to 'ByteString' and back again. -module PlutusCore.Bitwise.Convert ( - -- Wrappers - integerToByteStringWrapper, - byteStringToIntegerWrapper, - -- Implementation details - IntegerToByteStringError(..), - integerToByteStringMaximumOutputLength, - integerToByteString, - byteStringToInteger - ) where - -import PlutusCore.Builtin (BuiltinResult, emit) -import PlutusCore.Evaluation.Result (evaluationFailure) - -import ByteString.StrictBuilder (Builder) -import ByteString.StrictBuilder qualified as Builder -import Control.Monad (guard) -import Data.Bits (unsafeShiftL, unsafeShiftR, (.|.)) -import Data.ByteString (ByteString) -import Data.ByteString qualified as BS -import Data.Text (pack) -import Data.Word (Word64, Word8) -import GHC.ByteOrder (ByteOrder (BigEndian, LittleEndian)) -import GHC.Exts (Int (I#)) -import GHC.Integer.Logarithms (integerLog2#) - -{- Note [Input length limitation for IntegerToByteString]. We make - `integerToByteString` fail if it is called with arguments which would cause - the length of the result to exceed about 8K bytes because the execution time - becomes difficult to predict accurately beyond this point (benchmarks on a - number of different machines show that the CPU time increases smoothly for - inputs up to about 8K then increases sharply, becoming chaotic after about - 14K). This restriction may be removed once a more efficient implementation - becomes available, which may happen when we no longer have to support GHC - 8.10. -} -{- NB: if we do relax the length restriction then we will need two variants of - integerToByteString in Plutus Core so that we can continue to support the - current behaviour for old scripts.-} -integerToByteStringMaximumOutputLength :: Integer -integerToByteStringMaximumOutputLength = 8192 - -{- Return the base 2 logarithm of an integer, returning 0 for inputs that aren't - strictly positive. This is essentially copied from GHC.Num.Integer, which - has integerLog2 but only in GHC >= 9.0. We should use the library function - instead when we stop supporting 8.10. -} -integerLog2 :: Integer -> Int -integerLog2 !i = I# (integerLog2# i) - --- | Wrapper for 'integerToByteString' to make it more convenient to define as a builtin. -integerToByteStringWrapper :: Bool -> Integer -> Integer -> BuiltinResult ByteString -integerToByteStringWrapper endiannessArg lengthArg input - -- Check that the length is non-negative. - | lengthArg < 0 = do - emit "integerToByteString: negative length argument" - emit $ "Length requested: " <> (pack . show $ input) - evaluationFailure - -- Check that the requested length does not exceed the limit. *NB*: if we remove the limit we'll - -- still have to make sure that the length fits into an Int. - | lengthArg > integerToByteStringMaximumOutputLength = do - emit . pack $ "integerToByteString: requested length is too long (maximum is " - ++ (show $ integerToByteStringMaximumOutputLength) - ++ " bytes)" - emit $ "Length requested: " <> (pack . show $ lengthArg) - evaluationFailure - -- If the requested length is zero (ie, an explicit output size is not - -- specified) we still have to make sure that the output won't exceed the size - -- limit. If the requested length is nonzero and less than the limit, - -- integerToByteString checks that the input fits. - | (lengthArg == 0 -- integerLog2 n is one less than the number of significant bits in n - && fromIntegral (integerLog2 input) >= 8 * integerToByteStringMaximumOutputLength) = - let bytesRequiredFor n = (integerLog2 n) `div` 8 + 1 - -- ^ This gives 1 instead of 0 for n=0, but we'll never get that. - in do - emit . pack $ "integerToByteString: input too long (maximum is 2^" - ++ (show (8 * integerToByteStringMaximumOutputLength)) - ++ "-1)" - emit $ "Length required: " <> (pack . show $ bytesRequiredFor input) - evaluationFailure - | otherwise = let endianness = endiannessArgToByteOrder endiannessArg in - -- We use fromIntegral here, despite advice to the contrary in general when defining builtin - -- denotations. This is because, if we've made it this far, we know that overflow or truncation - -- are impossible: we've checked that whatever we got given fits inside a (non-negative) Int. - case integerToByteString endianness (fromIntegral lengthArg) input of - Left err -> case err of - NegativeInput -> do - emit "integerToByteString: cannot convert negative Integer" - -- This does work proportional to the size of input. However, we're in a failing case - -- anyway, and the user's paid for work proportional to this size in any case. - emit $ "Input: " <> (pack . show $ input) - evaluationFailure - NotEnoughDigits -> do - emit "integerToByteString: cannot represent Integer in given number of bytes" - -- This does work proportional to the size of input. However, we're in a failing case - -- anyway, and the user's paid for work proportional to this size in any case. - emit $ "Input: " <> (pack . show $ input) - emit $ "Bytes requested: " <> (pack . show $ lengthArg) - evaluationFailure - Right result -> pure result - --- | Wrapper for 'byteStringToInteger' to make it more convenient to define as a builtin. -byteStringToIntegerWrapper :: - Bool -> ByteString -> Integer -byteStringToIntegerWrapper statedEndiannessArg input = - let endianness = endiannessArgToByteOrder statedEndiannessArg in - byteStringToInteger endianness input - --- | Structured type to help indicate conversion errors. -data IntegerToByteStringError = - NegativeInput | - NotEnoughDigits - deriving stock (Eq, Show) - --- | Conversion from 'Integer' to 'ByteString', as per --- [CIP-121](https://github.com/mlabs-haskell/CIPs/tree/koz/to-from-bytestring/CIP-0121). --- --- For performance and clarity, the endianness argument uses --- 'ByteOrder', and the length argument is an 'Int'. -integerToByteString :: ByteOrder -> Int -> Integer -> Either IntegerToByteStringError ByteString -integerToByteString requestedByteOrder requestedLength input - | input < 0 = Left NegativeInput - | input == 0 = Right . BS.replicate requestedLength $ 0x00 - -- We use manual specialization to ensure as few branches in loop bodies as - -- we can. See Note [Manual specialization] for details. - | requestedLength == 0 = Right . Builder.builderBytes $ case requestedByteOrder of - LittleEndian -> goLENoLimit mempty input - BigEndian -> goBENoLimit mempty input - | otherwise = do - let result = case requestedByteOrder of - LittleEndian -> goLELimit mempty input - BigEndian -> goBELimit mempty input - case result of - Nothing -> Left NotEnoughDigits - Just b -> Right . Builder.builderBytes $ b - where - goLELimit :: Builder -> Integer -> Maybe Builder - goLELimit acc remaining - | remaining == 0 = pure $ padLE acc - | otherwise = do - -- builderLength is constant time, so we don't track the length ourselves - guard (Builder.builderLength acc < requestedLength) - -- This allows extracting eight digits at once. See Note [Loop sectioning] for details on - -- why we do this. We also duplicate this code in several places: see Note [Manual - -- specialization] for why. - -- - -- The code is basically equivalent to remaining `quotRem` 2^64, but more efficient. This - -- is for two reasons: firstly, GHC does not optimize divisions into shifts for Integer - -- (even if the divisor is constant), and secondly, the pair generated by `quotRem` costs - -- us as much as 15% peformance, and GHC seems unable to eliminate it. Thus, we have to do - -- it like this instead. - let newRemaining = remaining `unsafeShiftR` 64 - -- Given that remaining must be non-negative, fromInteger here effectively truncates to a - -- Word64, by retaining only the least-significant 8 bytes. - let digitGroup :: Word64 = fromInteger remaining - case newRemaining of - 0 -> finishLELimit acc digitGroup - _ -> goLELimit (acc <> Builder.storable digitGroup) newRemaining - finishLELimit :: Builder -> Word64 -> Maybe Builder - finishLELimit acc remaining - | remaining == 0 = pure $ padLE acc - | otherwise = do - guard (Builder.builderLength acc < requestedLength) - -- This is equivalent to 'remaining `quotRem` 256' followed by a conversion of the - -- remainder, but faster. This is similar to the larger example above, and we do it for - -- the same reasons. - let newRemaining = remaining `unsafeShiftR` 8 - let digit :: Word8 = fromIntegral remaining - finishLELimit (acc <> Builder.word8 digit) newRemaining - -- By separating the case where we don't need to concern ourselves with a - -- user-specified limit, we can avoid branching needlessly, or doing a - -- complex expression check on every loop. See Note [Manual specialization] - -- for why this matters. - goLENoLimit :: Builder -> Integer -> Builder - goLENoLimit acc remaining - | remaining == 0 = acc - | otherwise = let newRemaining = remaining `unsafeShiftR` 64 - digitGroup :: Word64 = fromInteger remaining - in case newRemaining of - 0 -> finishLENoLimit acc digitGroup - _ -> goLENoLimit (acc <> Builder.storable digitGroup) newRemaining - finishLENoLimit :: Builder -> Word64 -> Builder - finishLENoLimit acc remaining - | remaining == 0 = acc - | otherwise = - let newRemaining = remaining `unsafeShiftR` 8 - digit :: Word8 = fromIntegral remaining - in finishLENoLimit (acc <> Builder.word8 digit) newRemaining - padLE :: Builder -> Builder - padLE acc = let paddingLength = requestedLength - Builder.builderLength acc - in acc <> Builder.bytes (BS.replicate paddingLength 0x0) - -- We manually specialize the big-endian case: see Note [Manual specialization] for why. - goBELimit :: Builder -> Integer -> Maybe Builder - goBELimit acc remaining - | remaining == 0 = pure $ padBE acc - | otherwise = do - guard (Builder.builderLength acc < requestedLength) - let newRemaining = remaining `unsafeShiftR` 64 - let digitGroup :: Word64 = fromInteger remaining - case newRemaining of - 0 -> finishBELimit acc digitGroup - _ -> goBELimit (Builder.word64BE digitGroup <> acc) newRemaining - finishBELimit :: Builder -> Word64 -> Maybe Builder - finishBELimit acc remaining - | remaining == 0 = pure $ padBE acc - | otherwise = do - guard (Builder.builderLength acc < requestedLength) - let newRemaining = remaining `unsafeShiftR` 8 - let digit = fromIntegral remaining - finishBELimit (Builder.word8 digit <> acc) newRemaining - goBENoLimit :: Builder -> Integer -> Builder - goBENoLimit acc remaining - | remaining == 0 = acc - | otherwise = let newRemaining = remaining `unsafeShiftR` 64 - digitGroup = fromInteger remaining - in case newRemaining of - 0 -> finishBENoLimit acc digitGroup - _ -> goBENoLimit (Builder.word64BE digitGroup <> acc) newRemaining - finishBENoLimit :: Builder -> Word64 -> Builder - finishBENoLimit acc remaining - | remaining == 0 = acc - | otherwise = let newRemaining = remaining `unsafeShiftR` 8 - digit = fromIntegral remaining - in finishBENoLimit (Builder.word8 digit <> acc) newRemaining - padBE :: Builder -> Builder - padBE acc = let paddingLength = requestedLength - Builder.builderLength acc in - Builder.bytes (BS.replicate paddingLength 0x0) <> acc - --- | Conversion from 'ByteString' to 'Integer', as per --- [CIP-121](https://github.com/mlabs-haskell/CIPs/tree/koz/to-from-bytestring/CIP-0121). --- --- For clarity, the stated endianness argument uses 'ByteOrder'. -byteStringToInteger :: ByteOrder -> ByteString -> Integer - -- We use manual specialization to ensure as few branches in loop bodies as we can. See Note - -- [Manual specialization] for details. -byteStringToInteger statedByteOrder input = case statedByteOrder of - -- Since padding bytes in the most-significant-last representation go at - -- the end of the input, we can skip decoding them, as they won't affect - -- the result in any way. - LittleEndian -> case BS.findIndexEnd (/= 0x00) input of - -- If there are no nonzero bytes, it must be zero. - Nothing -> 0 - Just end -> goLE 0 end 0 - -- Since padding bytes in the most-significant-first representation go at - -- the beginning of the input, we can skip decoding them, as they won't - -- affect the result in any way. - BigEndian -> case BS.findIndex (/= 0x00) input of - Nothing -> 0 - Just end -> goBE 0 end 0 (BS.length input - 1) - where - -- Like with toByteString, we use loop sectioning to decode eight digits at once. See Note [Loop - -- sectioning] for why we do this. - goLE :: Integer -> Int -> Int -> Integer - goLE acc limit ix - | ix <= (limit - 7) = - let digitGroup = read64LE ix - -- Same as ix * 8, but faster. GHC might already do this optimization, but we may as - -- well be sure. - shift = ix `unsafeShiftL` 3 - newIx = ix + 8 - -- We use unsafeShiftL to move a group of eight digits into the right position in - -- the result, then combine with the accumulator. This is equivalent to a - -- multiplication by 2^64*k, but significantly faster, as GHC doesn't optimize - -- such multiplications into shifts for Integers. - newAcc = acc + fromIntegral digitGroup `unsafeShiftL` shift - in goLE newAcc limit newIx - | otherwise = finishLE acc limit ix - finishLE :: Integer -> Int -> Int -> Integer - finishLE acc limit ix - | ix > limit = acc - | otherwise = - let digit = BS.index input ix - shift = ix `unsafeShiftL` 3 - newIx = ix + 1 - -- Similarly to before, we use unsafeShiftL to move a single digit into the right - -- position in the result. - newAcc = acc + fromIntegral digit `unsafeShiftL` shift - in finishLE newAcc limit newIx - -- Technically, ByteString does not allow reading of anything bigger than a single byte. - -- However, because ByteStrings are counted arrays, caching already brings in adjacent bytes, - -- which makes fetching them quite cheap. Additionally, GHC appears to optimize this into a - -- block read of 64 bits at once, which saves memory movement. See Note [Superscalarity and - -- caching] for details of why this matters. - read64LE :: Int -> Word64 - read64LE startIx = - fromIntegral (BS.index input startIx) - .|. (fromIntegral (BS.index input (startIx + 1)) `unsafeShiftL` 8) - .|. (fromIntegral (BS.index input (startIx + 2)) `unsafeShiftL` 16) - .|. (fromIntegral (BS.index input (startIx + 3)) `unsafeShiftL` 24) - .|. (fromIntegral (BS.index input (startIx + 4)) `unsafeShiftL` 32) - .|. (fromIntegral (BS.index input (startIx + 5)) `unsafeShiftL` 40) - .|. (fromIntegral (BS.index input (startIx + 6)) `unsafeShiftL` 48) - .|. (fromIntegral (BS.index input (startIx + 7)) `unsafeShiftL` 56) - -- We manually specialize the big-endian cases: see Note [Manual specialization] for why. - -- - -- In the big-endian case, shifts and indexes change in different ways: indexes start _high_ - -- and _reduce_, but shifts start _low_ and rise. This is different to the little-endian case, - -- where both start low and rise. Thus, we track the index and shift separately in the - -- big-endian case: it makes the adjustments easier, and doesn't really change anything, as if - -- we wanted to compute the shift, we'd have to pass an offset argument anyway. - goBE :: Integer -> Int -> Int -> Int -> Integer - goBE acc limit shift ix - | ix >= (limit + 7) = - let digitGroup = read64BE ix - newShift = shift + 64 - newIx = ix - 8 - newAcc = acc + fromIntegral digitGroup `unsafeShiftL` shift - in goBE newAcc limit newShift newIx - | otherwise = finishBE acc limit shift ix - finishBE :: Integer -> Int -> Int -> Int -> Integer - finishBE acc limit shift ix - | ix < limit = acc - | otherwise = - let digit = BS.index input ix - newShift = shift + 8 - newIx = ix - 1 - newAcc = acc + fromIntegral digit `unsafeShiftL` shift - in finishBE newAcc limit newShift newIx - read64BE :: Int -> Word64 - read64BE endIx = - fromIntegral (BS.index input endIx) - .|. (fromIntegral (BS.index input (endIx - 1)) `unsafeShiftL` 8) - .|. (fromIntegral (BS.index input (endIx - 2)) `unsafeShiftL` 16) - .|. (fromIntegral (BS.index input (endIx - 3)) `unsafeShiftL` 24) - .|. (fromIntegral (BS.index input (endIx - 4)) `unsafeShiftL` 32) - .|. (fromIntegral (BS.index input (endIx - 5)) `unsafeShiftL` 40) - .|. (fromIntegral (BS.index input (endIx - 6)) `unsafeShiftL` 48) - .|. (fromIntegral (BS.index input (endIx - 7)) `unsafeShiftL` 56) - -endiannessArgToByteOrder :: Bool -> ByteOrder -endiannessArgToByteOrder b = if b then BigEndian else LittleEndian - -{- Note [Manual specialization] -For both integerToByteString and byteStringToInteger, we have to perform very -similar operations, but with small variations: - -- Most-significant-first versus most-significant-last (for both) -- Whether we have a size limit or not (for integerToByteString) - -Additionally, loop sectioning (see Note [Loop sectioning]) requires us to have -separate 'big-stride' and 'small-stride' operations to ensure universality of -input handling. Lastly, we have several subroutines (digit extraction, for -instance) that may vary in similar ways. In such a case, generalization by -means of abstraction seems like a good idea, as the operations (and -subroutines) vary little. - -At the same time, to determine which variation of any given function (or -subroutine) we need, we only have to scrutinize the relevant argument(s) once: -these specifics (such as byte order) don't change during the course of the -operation. Thus, we want to make sure that these checks in the code are _also_ -performed only once, ideally at the beginning. - -However, if we write such operations naively as so: - -> subroutine byteOrder arg1 arg2 = case byteOrder of -> LittleEndian -> ... -> BigEndian -> ... - -the byteOrder argument will be scrutinized on each call of subroutine. This is -correct in general (as there is no guarantee that the argument will be stable). -Strangely, however, even in a case like this one: - -> mainRoutine byteOrder arg1 arg2 = ... -> where -> subroutine arg3 = case byteOrder of -> LittleEndian -> ... -> BigEndian -> ... - -GHC _still_ re-scrutinizes byteOrder in every call of subroutine! This penalty -can be somewhat lessened using a form similar to this: - -> mainRoutine byteOrder arg1 arg2 = ... -> where -> !subroutine = case byteOrder of -> LittleEndian -> \arg3 -> ... -> BigEndian -> \arg3 -> ... - -but this is _still_ between 20 and 30% worse than doing something like this: - -> mainRoutine byteOrder arg1 arg2 = case byteOrder of -> LittleEndian -> [code calling subroutineLE where needed] -> BigEndian -> [code calling subroutineBE where needed] -> where -> subroutineLE arg3 = ... -> subroutineBE arg3 = ... - -This form _ensures_ we scrutinize (and branch) only the number of times we have -to, and in a predictable place. Since these are backends for Plutus Core primops, -and performance is thus critical, we choose to use this manually-specialized form -for each combination of relevant arguments. While this is repetitive, and thus -also somewhat error-prone, the performance penalty for not doing this is -unacceptable. --} - -{- Note [Loop sectioning] -Both integerToByteString and byteStringToInteger effectively function as loops -over digits (and thus, individual bytes), which either have to be read or -extracted. In particular, this involves trafficking data between memory and -machine registers (both ByteString and Integer are wrappers around counted -arrays), as well as the overhead of looping (involving comparisons and branches). - -However, on all architectures of interest (essentially, 64-bit Tier 1), -general-purpose registers (GPRs henceforth) are 64 bits (or 8 bytes). -Furthermore, the primary cost of moving data between memory and registers is -having to overcome the 'memory wall': the exact amount of data being moved -doesn't affect this very much. In addition to this, when we operate on single -bytes, the remaining 56 bits of the GPR holding that data are essentially -'wasted'. In the situation we have (namely, operating over arrays, whose data -is adjacent in memory), we thus get two sources of inefficiency: - -* Despite paying the cost for a memory transfer, we transfer only one-eighth - the data we could; and -* Despite transferring data from memory to registers, we utilize the register - at only one-eighth capacity. - -This essentially means we perform _eight times_ more rotations of the loop, -and memory moves, than we need to! - -To avoid this inefficiency, we use a technique known as _loop sectioning_. -Effectively, this turns our homogenous loop (that always operates one byte at -a time) into a heterogenous loop: first, we operate on a larger section (called -a _stride_) until we can no longer do this, and then we finish up using byte -at a time processing. Essentially, when given an input like this: - -[ b1, b2, b3, b4, b5, b6, b7, b8, b9, b10 ] - -the homogeous byte-at-a-time approach would process it like so: - - _ _ _ _ _ _ _ _ _ _ -[ b1, b2, b3, b4, b5, b6, b7, b8, b9, b10 ] - -for a total of 10 memory transfers and 10 loop spins, whereas a loop-sectioned -approach with a stride of 8 would instead process like so: - - ______________________________ _ _ -[ b1, b2, b3, b4, b5, b6, b7, b8, b9, b10 ] - -Giving us only _three_ memory transfers and _three_ loop spins instead. This -effectively reduces our work by a factor of 8. In our cases, this is almost -free, as there is no data processing to be done: all we need to do is copy -from one place to another, essentially. - -This technique only benefits us because counted arrays are cache-friendly: see -Note [Superscalarity and caching] for a longer explanation of this and why it -matters. - -Further information: - -- Tier 1 GHC platform list: - https://gitlab.haskell.org/ghc/ghc/-/wikis/platforms#tier-1-platforms -- Memory wall: - https://link.springer.com/referenceworkentry/10.1007/978-0-387-09766-4_234 -- Loop sectioning in more detail: - http://physics.ujep.cz/~zmoravec/prga/main_for/mergedProjects/optaps_for/common/optaps_vec_mine.htm --} - -{- Note [Superscalarity and caching] -On modern architectures, in order to process data, it must first be moved from -memory into a register. This operation has some cost (known as the 'memory wall'), -which is largely independent of how much data gets moved (assuming the register -can hold it): moving one byte, or a whole register's worth, costs about the same. -To reduce this cost, CPU manufacturers have introduced _cache hierarchies_, -which are designed to limit the cost of the wall, as long as the data access -matches the cache's optimal usage pattern. Thus, while an idealized view of -the memory hierachy is this: - -Registers ---------- -Memory - -in reality, the view is more like this: - -Registers ---------- -L1 cache ---------- -L2 cache ---------- -L3 cache (on some platforms) ---------- -Memory - -Each 'higher' cache in the hierarchy is smaller, but faster, and when a memory -fetch is requested in code, in addition to moving the requested data to a -register, that data (plus some more) is moved into caches as well. The amount -of data moved into cache (a _cache line_) is typically eight machine words on -modern architectures (and definitely is the case for all Tier 1 GHC platforms): -for the cases concerning Plutus, that is 64 bytes. Therefore, if data we need -soon after a fetch is _physically_ nearby, it won't need to be fetched from -memory: instead, it would come from a cache, which is faster (by a considerable -margin). - -To see how this can matter, consider the following ByteString: - -[ b1, b2, b3, b4, b5, b6, b7, b8, b9, b10, b11 ] - -The ByteString (being a counted array) has all of its data physically adjacent -to each other. Suppose we wanted to fetch the byte at index 1 (second position). -The naive view of what happens is like this: - -Registers: [b2] [ ] [ ] .... [ ] -Memory: [ b1, b2, b3, b4, b5, b6, b7, b8, b9, b10, b11 ] - -Thus, it would appear that, if we wanted a different position's value, we would -need to fetch from memory again. However, what _actually_ happens is more like this: - -Registers: [b2] [ ] [ ] .... [ ] -L1 cache: [ b2, b3, b4, b5, b6, b7, b8, b9, b10, b11 ] -Memory: [ b1, b2, b3, b4, b5, b6, b7, b8, b9, b10, b11 ] - -We note that b2, as well as its adjacent elements, were _all_ pulled into the L1 -cache. This can only work because all these elements are physically adjacent in -memory. The improvement in performance from this cache use is _very_ non-trivial: -an L1 cache is about 200 times faster than a memory access, and an L2 cache about -20 times faster. - -To take further advantage of this, modern CPUs (and all Tier 1 GHC platforms have -this capability) are _superscalar_. To explain what this means, let's consider the -naive view of how CPUs execute instructions: namely, it is one-at-a-time, and -synchronous. While CPUs must give the _appearance_ that they behave this way, in -practice, CPU execution is very much asynchronous: due to the proliferation of ALUs -on a single chip, having twice as many processing units is much cheaper than having -processing units run twice as fast. Thus, if there are no data dependencies -between instructions, CPUs can (and do!) execute them simultaneously, stalling to -await results if a data dependency is detected. This can be done automatically -using Tomasulo's algorithm, which ensures no conflicts with maximum throughput. - -Superscalarity interacts well with the cache hierarchy, as it makes data more -easily available for processing, provided there is enough 'work to do', and no -data dependencies. In our situation, most of what we do is data _movement_ from -one memory location to another, which by its very nature lacks any data -dependencies. - -Further references: - -- Numbers for cache and memory transfers: https://gist.github.com/jboner/2841832 -- Superscalarity: https://en.wikipedia.org/wiki/Superscalar_processor -- Tomasulo's algorithm: https://en.wikipedia.org/wiki/Tomasulo%27s_algorithm --} diff --git a/plutus-core/plutus-core/src/PlutusCore/Bitwise/Logical.hs b/plutus-core/plutus-core/src/PlutusCore/Bitwise/Logical.hs deleted file mode 100644 index 7e228ad80ab..00000000000 --- a/plutus-core/plutus-core/src/PlutusCore/Bitwise/Logical.hs +++ /dev/null @@ -1,464 +0,0 @@ --- editorconfig-checker-disable-file - -{-# LANGUAGE OverloadedStrings #-} - --- | Implementations of bitwise logical primops. -module PlutusCore.Bitwise.Logical ( - andByteString, - orByteString, - xorByteString, - complementByteString, - readBit, - writeBits, - replicateByteString - ) where - -import Control.Exception (Exception, throw, try) -import Data.Bits qualified as Bits -import Data.ByteString (ByteString) -import Data.ByteString qualified as BS -import Data.ByteString.Internal qualified as BSI -import Data.Foldable (for_, traverse_) -import Data.Text (pack) -import Data.Word (Word64, Word8) -import Foreign.Marshal.Utils (copyBytes) -import Foreign.Ptr (Ptr, castPtr, plusPtr) -import Foreign.Storable (peekByteOff, peekElemOff, pokeByteOff, pokeElemOff) -import PlutusCore.Builtin (BuiltinResult, emit) -import PlutusCore.Evaluation.Result (evaluationFailure) -import System.IO.Unsafe (unsafeDupablePerformIO) - -{- Note [Binary bitwise operation implementation and manual specialization] - - All of the 'binary' bitwise operations (namely `andByteString`, - `orByteString` and `xorByteString`) operate similarly: - - 1. Decide which of their two `ByteString` arguments determines the length - of the result. For padding semantics, this is the _longer_ argument, - whereas for truncation semantics, it's the _shorter_ one. If both - `ByteString` arguments have identical length, it doesn't matter which we - choose. - 2. Copy the choice made in step 1 into a fresh mutable buffer. - 3. Traverse over each byte of the argument _not_ chosen in step 1, and - combine each of those bytes with the byte at the corresponding index of - the fresh mutable buffer from step 2 (`.&.` for `andByteString`, - `.|.` for `orByteString`, `xor` for `xorByteString`). - - We also make use of loop sectioning to optimize this operation: see Note - [Loop sectioning] explaining why we do this. Fundamentally, this doesn't - change the logic of the operation, but means that step 3 is split into - two smaller sub-steps: we first word 8 bytes at a time, then one byte at a - time to finish up if necessary. Other than the choice of 'combining - operation', the structure of the computation is the same, which suggests that - we want a helper function with a signature like - - helper1 :: - (Word64 -> Word64 -> Word64) -> - (Word8 -> Word8 -> Word8) -> - ByteString -> - ByteString -> - Int -> - ByteString - - or possibly (to avoid duplicate argument passing) like - - helper2 :: - (forall (a :: Type) . Bits a => a -> a -> a) -> - ByteString -> - ByteString -> - Int -> - ByteString - - This would allow us to share all this logic, and have each of the 'top-level' - operations just dispatch to either of the helpers with the appropriate - function argument(s). Instead, we chose to write a manual copy of this logic - for each of the 'top-level' operations, substituting only the 'combining - operation'. - - We made this choice as any design based on either `helper1` or `helper2` is - significantly slower (at least 50% worse, and the penalty _percentage_ grows - with argument size). While `helper2` is significantly more penalizing than - `helper1`, even `helper1` reaches an almost threefold slowdown at the higher - input sizes we are interested in relative the manual version we use here. - Due to the 'low-level' nature of Plutus Core primops, we consider these costs - unacceptable relative the (small) benefits to code clarity and maintainability - any solution using either style of helper would provide. - - The reason for `helper2` under-performing is unsurprising: any argument whose - type is rank-2 polymorphic with a dictionary constraint essentially acts as - a 'program template', which gets interpreted at runtime given some dictionary - for a `Bits` instance. GHC can do practically nothing to optimize this, as - there is no way to tell, for any given argument, _which_ definitions of an - instance would be required here, even if the set of operations we use is - finite, since any instance can make use of the full power of Haskell, which - essentially lands us in Rice's Theorem territory. For `helper1`, the reasons - are similar: it _must_ be able to work regardless of what functions (assuming - appropriate types) it is given, which means in general, GHC is forced to - compile mother-may-I-style code involving pointer chasing those arguments at - runtime. This explains why the 'blowup' becomes worse with argument length. - - While in theory inlining could help with at least the `helper1` case ( - `helper2` is beyond that technique), it doesn't seem like GHC is able to - figure this out, even with `INLINE` is placed on `helper1`. - -} - --- | Bitwise logical AND, as per [CIP-122](https://github.com/mlabs-haskell/CIPs/blob/koz/logic-ops/CIP-0122/CIP-0122.md). -{-# INLINEABLE andByteString #-} -andByteString :: Bool -> ByteString -> ByteString -> ByteString -andByteString shouldPad bs1 bs2 = - let (shorter, longer) = if BS.length bs1 < BS.length bs2 then (bs1, bs2) else (bs2, bs1) - (toCopy, toTraverse) = if shouldPad then (longer, shorter) else (shorter, longer) - in go toCopy toTraverse (BS.length shorter) - where - go :: ByteString -> ByteString -> Int -> ByteString - go toCopy toTraverse traverseLen = - unsafeDupablePerformIO . BS.useAsCStringLen toCopy $ \(copyPtr, copyLen) -> - BS.useAsCString toTraverse $ \traversePtr -> do - BSI.create copyLen $ \dstPtr -> do - copyBytes dstPtr (castPtr copyPtr) copyLen - let (bigStrides, littleStrides) = traverseLen `quotRem` 8 - let offset = bigStrides * 8 - let bigDstPtr :: Ptr Word64 = castPtr dstPtr - let bigTraversePtr :: Ptr Word64 = castPtr traversePtr - for_ [0 .. bigStrides - 1] $ \i -> do - w64_1 <- peekElemOff bigDstPtr i - w64_2 <- peekElemOff bigTraversePtr i - pokeElemOff bigDstPtr i $ w64_1 Bits..&. w64_2 - let smallDstPtr :: Ptr Word8 = plusPtr dstPtr offset - let smallTraversePtr :: Ptr Word8 = plusPtr traversePtr offset - for_ [0 .. littleStrides - 1] $ \i -> do - w8_1 <- peekElemOff smallDstPtr i - w8_2 <- peekElemOff smallTraversePtr i - pokeElemOff smallDstPtr i $ w8_1 Bits..&. w8_2 - --- | Bitwise logical OR, as per [CIP-122](https://github.com/mlabs-haskell/CIPs/blob/koz/logic-ops/CIP-0122/CIP-0122.md). -{-# INLINEABLE orByteString #-} -orByteString :: Bool -> ByteString -> ByteString -> ByteString -orByteString shouldPad bs1 bs2 = - let (shorter, longer) = if BS.length bs1 < BS.length bs2 then (bs1, bs2) else (bs2, bs1) - (toCopy, toTraverse) = if shouldPad then (longer, shorter) else (shorter, longer) - in go toCopy toTraverse (BS.length shorter) - where - go :: ByteString -> ByteString -> Int -> ByteString - go toCopy toTraverse traverseLen = - unsafeDupablePerformIO . BS.useAsCStringLen toCopy $ \(copyPtr, copyLen) -> - BS.useAsCString toTraverse $ \traversePtr -> do - BSI.create copyLen $ \dstPtr -> do - copyBytes dstPtr (castPtr copyPtr) copyLen - let (bigStrides, littleStrides) = traverseLen `quotRem` 8 - let offset = bigStrides * 8 - let bigDstPtr :: Ptr Word64 = castPtr dstPtr - let bigTraversePtr :: Ptr Word64 = castPtr traversePtr - for_ [0 .. bigStrides - 1] $ \i -> do - w64_1 <- peekElemOff bigDstPtr i - w64_2 <- peekElemOff bigTraversePtr i - pokeElemOff bigDstPtr i $ w64_1 Bits..|. w64_2 - let smallDstPtr :: Ptr Word8 = plusPtr dstPtr offset - let smallTraversePtr :: Ptr Word8 = plusPtr traversePtr offset - for_ [0 .. littleStrides - 1] $ \i -> do - w8_1 <- peekElemOff smallDstPtr i - w8_2 <- peekElemOff smallTraversePtr i - pokeElemOff smallDstPtr i $ w8_1 Bits..|. w8_2 - --- | Bitwise logical XOR, as per [CIP-122](https://github.com/mlabs-haskell/CIPs/blob/koz/logic-ops/CIP-0122/CIP-0122.md). -{-# INLINEABLE xorByteString #-} -xorByteString :: Bool -> ByteString -> ByteString -> ByteString -xorByteString shouldPad bs1 bs2 = - let (shorter, longer) = if BS.length bs1 < BS.length bs2 then (bs1, bs2) else (bs2, bs1) - (toCopy, toTraverse) = if shouldPad then (longer, shorter) else (shorter, longer) - in go toCopy toTraverse (BS.length shorter) - where - go :: ByteString -> ByteString -> Int -> ByteString - go toCopy toTraverse traverseLen = - unsafeDupablePerformIO . BS.useAsCStringLen toCopy $ \(copyPtr, copyLen) -> - BS.useAsCString toTraverse $ \traversePtr -> do - BSI.create copyLen $ \dstPtr -> do - copyBytes dstPtr (castPtr copyPtr) copyLen - let (bigStrides, littleStrides) = traverseLen `quotRem` 8 - let offset = bigStrides * 8 - let bigDstPtr :: Ptr Word64 = castPtr dstPtr - let bigTraversePtr :: Ptr Word64 = castPtr traversePtr - for_ [0 .. bigStrides - 1] $ \i -> do - w64_1 <- peekElemOff bigDstPtr i - w64_2 <- peekElemOff bigTraversePtr i - pokeElemOff bigDstPtr i $ Bits.xor w64_1 w64_2 - let smallDstPtr :: Ptr Word8 = plusPtr dstPtr offset - let smallTraversePtr :: Ptr Word8 = plusPtr traversePtr offset - for_ [0 .. littleStrides - 1] $ \i -> do - w8_1 <- peekElemOff smallDstPtr i - w8_2 <- peekElemOff smallTraversePtr i - pokeElemOff smallDstPtr i $ Bits.xor w8_1 w8_2 - --- | Bitwise logical complement, as per [CIP-122](https://github.com/mlabs-haskell/CIPs/blob/koz/logic-ops/CIP-0122/CIP-0122.md). -{-# INLINEABLE complementByteString #-} -complementByteString :: ByteString -> ByteString -complementByteString bs = unsafeDupablePerformIO . BS.useAsCStringLen bs $ \(srcPtr, len) -> do - -- We use loop sectioning here; see Note [Loop sectioning] as to why we do this - let (bigStrides, littleStrides) = len `quotRem` 8 - let offset = bigStrides * 8 - BSI.create len $ \dstPtr -> do - let bigSrcPtr :: Ptr Word64 = castPtr srcPtr - let bigDstPtr :: Ptr Word64 = castPtr dstPtr - for_ [0 .. bigStrides - 1] $ \i -> do - w64 <- peekElemOff bigSrcPtr i - pokeElemOff bigDstPtr i . Bits.complement $ w64 - let smallSrcPtr :: Ptr Word8 = plusPtr srcPtr offset - let smallDstPtr :: Ptr Word8 = plusPtr dstPtr offset - for_ [0 .. littleStrides - 1] $ \i -> do - w8 <- peekElemOff smallSrcPtr i - pokeElemOff smallDstPtr i . Bits.complement $ w8 - --- | Bit read at index, as per [CIP-122](https://github.com/mlabs-haskell/CIPs/blob/koz/logic-ops/CIP-0122/CIP-0122.md) -{-# INLINEABLE readBit #-} -readBit :: ByteString -> Int -> BuiltinResult Bool -readBit bs ix - | ix < 0 = do - emit "readBit: index out of bounds" - emit $ "Index: " <> (pack . show $ ix) - evaluationFailure - | ix >= len * 8 = do - emit "readBit: index out of bounds" - emit $ "Index: " <> (pack . show $ ix) - evaluationFailure - | otherwise = do - let (bigIx, littleIx) = ix `quotRem` 8 - let flipIx = len - bigIx - 1 - pure $ Bits.testBit (BS.index bs flipIx) littleIx - where - len :: Int - len = BS.length bs - --- | Bulk bit write, as per [CIP-122](https://github.com/mlabs-haskell/CIPs/blob/koz/logic-ops/CIP-0122/CIP-0122.md) -{-# INLINEABLE writeBits #-} -writeBits :: ByteString -> [(Integer, Bool)] -> BuiltinResult ByteString -writeBits bs changelist = case unsafeDupablePerformIO . try $ go of - Left (WriteBitsException i) -> do - emit "writeBits: index out of bounds" - emit $ "Index: " <> (pack . show $ i) - evaluationFailure - Right result -> pure result - where - -- This is written in a somewhat strange way. See Note [writeBits and - -- exceptions], which covers why we did this. - go :: IO ByteString - go = BS.useAsCString bs $ \srcPtr -> - BSI.create len $ \dstPtr -> do - copyBytes dstPtr (castPtr srcPtr) len - traverse_ (setAtIx dstPtr) changelist - len :: Int - len = BS.length bs - bitLen :: Integer - bitLen = fromIntegral len * 8 - setAtIx :: Ptr Word8 -> (Integer, Bool) -> IO () - setAtIx ptr (i, b) - | i < 0 = throw $ WriteBitsException i - | i >= bitLen = throw $ WriteBitsException i - | otherwise = do - let (bigIx, littleIx) = i `quotRem` 8 - let flipIx = len - fromIntegral bigIx - 1 - w8 :: Word8 <- peekByteOff ptr flipIx - let toWrite = if b - then Bits.setBit w8 . fromIntegral $ littleIx - else Bits.clearBit w8 . fromIntegral $ littleIx - pokeByteOff ptr flipIx toWrite - --- | Byte replication, as per [CIP-122](https://github.com/mlabs-haskell/CIPs/blob/koz/logic-ops/CIP-0122/CIP-0122.md) -replicateByteString :: Int -> Word8 -> BuiltinResult ByteString -replicateByteString len w8 - | len < 0 = do - emit "byteStringReplicate: negative length requested" - evaluationFailure - | otherwise = pure . BS.replicate len $ w8 - --- Helpers - -{- Note [writeBits and exceptions] - - As `writeBits` allows us to pass a changelist argument of any length, we - potentially could have an out-of-bounds index anywhere in the list. As we - have to fail on such cases (and report them appropriately), we end up needing - _both_ IO (to do mutable things) as well as a way to signal errors. We can - do this in two ways: - - 1. Pre-scan the changelist for any out-of-bounds indexes, fail if we see any, - then apply the necessary changes if no out-of-bounds indexes are found. - 2. Speculatively allocate the new `ByteString`, then do the changes in the - changelist argument one at a time, failing as soon as we see an out-of-bounds - index. - - Option 1 would require traversing the changelist argument twice, which is - undesirable, which means that option 2 is the more efficient choice. The - natural choice for option 2 would be something similar to `ExceptT Int IO` - (with the `Int` being an out-of-bounds index). However, we aren't able to do - this, as ultimately, `ByteString`s are implemented as `ForeignPtr`s, forcing - us to use the following function to interact with them, directly or not: - - withForeignPtr :: forall (a :: Type) . ForeignPtr a -> (Ptr a -> IO b) -> IO b - - Notably, the function argument produces a result of `IO b`, whereas we would - need `MonadIO m => m b` instead. This means that our _only_ choice is to - use the exception mechanism, either directly or via some wrappers like - `MonadUnliftIO`. While this is unusual, and arguably against the spirit of - the use of `IO` relative `ByteString` construction, we don't have any other - choice. We decided to use the exception mechanism directly, as while - `MonadUnliftIO` is a bit cleaner, it ultimately ends up doing the same thing - anyway, and this method at least makes it clear what we're doing. - - This doesn't pose any problems from the point of view of Plutus Core, as this - exception cannot 'leak'; we handle it entirely within `writeBits`, and no - other Plutus Core code can ever see it. --} -newtype WriteBitsException = WriteBitsException Integer - deriving stock (Eq, Show) - -instance Exception WriteBitsException - -{- Note [Loop sectioning] - -Several operations in this module effectively function as loops over bytes, -which have to be read, written, or both. Furthermore, we usually need to -process these bytes somehow, typically using fixed-width bitwise operations -from the Haskell side, thus allowing us to 'translate' these same operations -to the variable-width `ByteString` arguments we are dealing with. This involves -significant trafficking of data between memory and machine registers (as -`ByteString`s are wrapped counted arrays), as well as the overheads of looping -(involving comparisons and branches). This trafficking is necessary not only -to move the memory around, but also to process it, as on modern architectures, -data must first be moved into a register in order to do anything with it. - -On all architectures of interest (essentially, 64-bit Tier 1), general-purpose -registers (GPRs henceforth) are 64 bits (or 8 bytes) wide. Furthermore, the -primary cost of moving data between memory and registers is having to overcome -the 'memory wall': the exact amount of data being moved doesn't affect this -much. In addition to this, when we operate on single bytes, the remaining 56 -bits of the GPR holding that data are essentially 'wasted'. In the situation -we are in (namely, operating over arrays, whose data is adjacent in memory), -we thus get two sources of inefficiency: - -* Despite paying the cost for a memory transfer, we move only one-eighth of - the data we could; and -* Despite transferring data from memory to registers, we use these registers - only at one-eighth capacity. - -In short, we do _eight times_ more rotations of the loop, and memory moves, -than we need to! - -To avoid this, we use a technique called _loop sectioning_. Effectively, this -transforms our homogenous loop (that always works one byte at a time) into a -heterogenous loop: first, we operate on a larger section (called a _stride_) -until we can no longer do this, and then we finish up using byte at a time -processing. Essentially, given an input like this: - -[ b1, b2, b3, b4, b5, b6, b7, b8, b9, b10 ] - -the homogeous byte-at-a-time approach would process it like so: - - _ _ _ _ _ _ _ _ _ _ -[ b1, b2, b3, b4, b5, b6, b7, b8, b9, b10 ] - -for a total of 10 memory transfers and 10 loop spins, whereas a loop-sectioned -approach with a stride of 8 would instead process like so: - - ______________________________ _ _ -[ b1, b2, b3, b4, b5, b6, b7, b8, b9, b10 ] - -This gives us only _three_ memory transfers and _three_ loop spins instead. This -effectively reduces our work by a factor of 8. In our cases, this is significant. - -This technique only benefits us because counted arrays are cache-friendly: see -Note [Superscalarity and caching] for a longer explanation of this and why it -matters. - -Further information: - -- Tier 1 GHC platform list: - https://gitlab.haskell.org/ghc/ghc/-/wikis/platforms#tier-1-platforms -- Memory wall: - https://link.springer.com/referenceworkentry/10.1007/978-0-387-09766-4_234 -- Loop sectioning in more detail: - http://physics.ujep.cz/~zmoravec/prga/main_for/mergedProjects/optaps_for/common/optaps_vec_mine.htm --} - -{- Note [Superscalarity and caching] -On modern architectures, in order to process data, it must first be moved from -memory into a register. This operation has some cost (known as the 'memory wall'), -which is largely independent of how much data gets moved (assuming the register -can hold it): moving one byte, or a whole register's worth, costs about the same. -To reduce this cost, CPU manufacturers have introduced _cache hierarchies_, -which are designed to limit the cost of the wall, as long as the data access -matches the cache's optimal usage pattern. Thus, while an idealized view of -the memory hierachy is this: - -Registers ---------- -Memory - -in reality, the view is more like this: - -Registers ---------- -L1 cache ---------- -L2 cache ---------- -L3 cache (on some platforms) ---------- -Memory - -Each 'higher' cache in the hierarchy is smaller, but faster, and when a memory -fetch is requested in code, in addition to moving the requested data to a -register, that data (plus some more) is moved into caches as well. The amount -of data moved into cache (a _cache line_) is typically eight machine words on -modern architectures (and definitely is the case for all Tier 1 GHC platforms): -for the cases concerning Plutus, that is 64 bytes. Therefore, if data we need -soon after a fetch is _physically_ nearby, it won't need to be fetched from -memory: instead, it would come from a cache, which is faster (by a considerable -margin). - -To see how this can matter, consider the following ByteString: - -[ b1, b2, b3, b4, b5, b6, b7, b8, b9, b10, b11 ] - -The ByteString (being a counted array) has all of its data physically adjacent -to each other. Suppose we wanted to fetch the byte at index 1 (second position). -The naive view of what happens is like this: - -Registers: [b2] [ ] [ ] .... [ ] -Memory: [ b1, b2, b3, b4, b5, b6, b7, b8, b9, b10, b11 ] - -Thus, it would appear that, if we wanted a different position's value, we would -need to fetch from memory again. However, what _actually_ happens is more like this: - -Registers: [b2] [ ] [ ] .... [ ] -L1 cache: [ b2, b3, b4, b5, b6, b7, b8, b9, b10, b11 ] -Memory: [ b1, b2, b3, b4, b5, b6, b7, b8, b9, b10, b11 ] - -We note that b2, as well as its adjacent elements, were _all_ pulled into the L1 -cache. This can only work because all these elements are physically adjacent in -memory. The improvement in performance from this cache use is _very_ non-trivial: -an L1 cache is about 200 times faster than a memory access, and an L2 cache about -20 times faster. - -To take further advantage of this, modern CPUs (and all Tier 1 GHC platforms have -this capability) are _superscalar_. To explain what this means, let's consider the -naive view of how CPUs execute instructions: namely, it is one-at-a-time, and -synchronous. While CPUs must give the _appearance_ that they behave this way, in -practice, CPU execution is very much asynchronous: due to the proliferation of ALUs -on a single chip, having twice as many processing units is much cheaper than having -processing units run twice as fast. Thus, if there are no data dependencies -between instructions, CPUs can (and do!) execute them simultaneously, stalling to -await results if a data dependency is detected. This can be done automatically -using Tomasulo's algorithm, which ensures no conflicts with maximum throughput. - -Superscalarity interacts well with the cache hierarchy, as it makes data more -easily available for processing, provided there is enough 'work to do', and no -data dependencies. In our situation, most of what we do is data _movement_ from -one memory location to another, which by its very nature lacks any data -dependencies. - -Further references: - -- Numbers for cache and memory transfers: https://gist.github.com/jboner/2841832 -- Superscalarity: https://en.wikipedia.org/wiki/Superscalar_processor -- Tomasulo's algorithm: https://en.wikipedia.org/wiki/Tomasulo%27s_algorithm --} diff --git a/plutus-core/plutus-core/src/PlutusCore/Default/Builtins.hs b/plutus-core/plutus-core/src/PlutusCore/Default/Builtins.hs index a34d129237f..ecc6bc4f5f0 100644 --- a/plutus-core/plutus-core/src/PlutusCore/Default/Builtins.hs +++ b/plutus-core/plutus-core/src/PlutusCore/Default/Builtins.hs @@ -7,7 +7,6 @@ {-# LANGUAGE MultiParamTypeClasses #-} {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE ScopedTypeVariables #-} -{-# LANGUAGE TypeApplications #-} {-# LANGUAGE TypeFamilies #-} {-# LANGUAGE TypeOperators #-} {-# LANGUAGE UndecidableInstances #-} @@ -26,8 +25,7 @@ import PlutusCore.Evaluation.Machine.ExMemoryUsage (ExMemoryUsage, LiteralByteSi import PlutusCore.Evaluation.Result (EvaluationResult (..)) import PlutusCore.Pretty (PrettyConfigPlc) -import PlutusCore.Bitwise.Convert as Convert -import PlutusCore.Bitwise.Logical as Logical +import PlutusCore.Bitwise qualified as Bitwise import PlutusCore.Crypto.BLS12_381.G1 qualified as BLS12_381.G1 import PlutusCore.Crypto.BLS12_381.G2 qualified as BLS12_381.G2 import PlutusCore.Crypto.BLS12_381.Pairing qualified as BLS12_381.Pairing @@ -160,7 +158,12 @@ data DefaultFun | ComplementByteString | ReadBit | WriteBits - | ReplicateByteString + | ReplicateByte + -- Bitwise + | ShiftByteString + | RotateByteString + | CountSetBits + | FindFirstSetBit deriving stock (Show, Eq, Ord, Enum, Bounded, Generic, Ix) deriving anyclass (NFData, Hashable, PrettyBy PrettyConfigPlc) @@ -1817,7 +1820,7 @@ instance uni ~ DefaultUni => ToBuiltinMeaning uni DefaultFun where let integerToByteStringDenotation :: Bool -> LiteralByteSize -> Integer -> BuiltinResult BS.ByteString {- The second argument is wrapped in a LiteralByteSize to allow us to interpret it as a size during costing. It appears as an integer in UPLC: see Note [Integral types as Integer]. -} - integerToByteStringDenotation b (LiteralByteSize w) n = integerToByteStringWrapper b w n + integerToByteStringDenotation b (LiteralByteSize w) = Bitwise.integerToByteStringWrapper b w {-# INLINE integerToByteStringDenotation #-} in makeBuiltinMeaning integerToByteStringDenotation @@ -1825,7 +1828,7 @@ instance uni ~ DefaultUni => ToBuiltinMeaning uni DefaultFun where toBuiltinMeaning _semvar ByteStringToInteger = let byteStringToIntegerDenotation :: Bool -> BS.ByteString -> Integer - byteStringToIntegerDenotation = byteStringToIntegerWrapper + byteStringToIntegerDenotation = Bitwise.byteStringToIntegerWrapper {-# INLINE byteStringToIntegerDenotation #-} in makeBuiltinMeaning byteStringToIntegerDenotation @@ -1834,7 +1837,7 @@ instance uni ~ DefaultUni => ToBuiltinMeaning uni DefaultFun where -- Logical toBuiltinMeaning _semvar AndByteString = let andByteStringDenotation :: Bool -> BS.ByteString -> BS.ByteString -> BS.ByteString - andByteStringDenotation = Logical.andByteString + andByteStringDenotation = Bitwise.andByteString {-# INLINE andByteStringDenotation #-} in makeBuiltinMeaning andByteStringDenotation @@ -1842,7 +1845,7 @@ instance uni ~ DefaultUni => ToBuiltinMeaning uni DefaultFun where toBuiltinMeaning _semvar OrByteString = let orByteStringDenotation :: Bool -> BS.ByteString -> BS.ByteString -> BS.ByteString - orByteStringDenotation = Logical.orByteString + orByteStringDenotation = Bitwise.orByteString {-# INLINE orByteStringDenotation #-} in makeBuiltinMeaning orByteStringDenotation @@ -1850,7 +1853,7 @@ instance uni ~ DefaultUni => ToBuiltinMeaning uni DefaultFun where toBuiltinMeaning _semvar XorByteString = let xorByteStringDenotation :: Bool -> BS.ByteString -> BS.ByteString -> BS.ByteString - xorByteStringDenotation = Logical.xorByteString + xorByteStringDenotation = Bitwise.xorByteString {-# INLINE xorByteStringDenotation #-} in makeBuiltinMeaning xorByteStringDenotation @@ -1858,7 +1861,7 @@ instance uni ~ DefaultUni => ToBuiltinMeaning uni DefaultFun where toBuiltinMeaning _semvar ComplementByteString = let complementByteStringDenotation :: BS.ByteString -> BS.ByteString - complementByteStringDenotation = Logical.complementByteString + complementByteStringDenotation = Bitwise.complementByteString {-# INLINE complementByteStringDenotation #-} in makeBuiltinMeaning complementByteStringDenotation @@ -1866,7 +1869,7 @@ instance uni ~ DefaultUni => ToBuiltinMeaning uni DefaultFun where toBuiltinMeaning _semvar ReadBit = let readBitDenotation :: BS.ByteString -> Int -> BuiltinResult Bool - readBitDenotation = Logical.readBit + readBitDenotation = Bitwise.readBit {-# INLINE readBitDenotation #-} in makeBuiltinMeaning readBitDenotation @@ -1874,20 +1877,54 @@ instance uni ~ DefaultUni => ToBuiltinMeaning uni DefaultFun where toBuiltinMeaning _semvar WriteBits = let writeBitsDenotation :: BS.ByteString -> [(Integer, Bool)] -> BuiltinResult BS.ByteString - writeBitsDenotation = Logical.writeBits + writeBitsDenotation = Bitwise.writeBits {-# INLINE writeBitsDenotation #-} in makeBuiltinMeaning writeBitsDenotation (runCostingFunTwoArguments . unimplementedCostingFun) - toBuiltinMeaning _semvar ReplicateByteString = - let byteStringReplicateDenotation :: Int -> Word8 -> BuiltinResult BS.ByteString - byteStringReplicateDenotation = Logical.replicateByteString - {-# INLINE byteStringReplicateDenotation #-} + toBuiltinMeaning _semvar ReplicateByte = + let replicateByteDenotation :: Int -> Word8 -> BuiltinResult BS.ByteString + replicateByteDenotation = Bitwise.replicateByte + {-# INLINE replicateByteDenotation #-} in makeBuiltinMeaning - byteStringReplicateDenotation + replicateByteDenotation (runCostingFunTwoArguments . unimplementedCostingFun) + -- Bitwise + + toBuiltinMeaning _semvar ShiftByteString = + let shiftByteStringDenotation :: BS.ByteString -> Int -> BS.ByteString + shiftByteStringDenotation = Bitwise.shiftByteString + {-# INLINE shiftByteStringDenotation #-} + in makeBuiltinMeaning + shiftByteStringDenotation + (runCostingFunTwoArguments . unimplementedCostingFun) + + toBuiltinMeaning _semvar RotateByteString = + let rotateByteStringDenotation :: BS.ByteString -> Int -> BS.ByteString + rotateByteStringDenotation = Bitwise.rotateByteString + {-# INLINE rotateByteStringDenotation #-} + in makeBuiltinMeaning + rotateByteStringDenotation + (runCostingFunTwoArguments . unimplementedCostingFun) + + toBuiltinMeaning _semvar CountSetBits = + let countSetBitsDenotation :: BS.ByteString -> Int + countSetBitsDenotation = Bitwise.countSetBits + {-# INLINE countSetBitsDenotation #-} + in makeBuiltinMeaning + countSetBitsDenotation + (runCostingFunOneArgument . unimplementedCostingFun) + + toBuiltinMeaning _semvar FindFirstSetBit = + let findFirstSetBitDenotation :: BS.ByteString -> Int + findFirstSetBitDenotation = Bitwise.findFirstSetBit + {-# INLINE findFirstSetBitDenotation #-} + in makeBuiltinMeaning + findFirstSetBitDenotation + (runCostingFunOneArgument . unimplementedCostingFun) + -- See Note [Inlining meanings of builtins]. {-# INLINE toBuiltinMeaning #-} @@ -2021,7 +2058,12 @@ instance Flat DefaultFun where ComplementByteString -> 78 ReadBit -> 79 WriteBits -> 80 - ReplicateByteString -> 81 + ReplicateByte -> 81 + + ShiftByteString -> 82 + RotateByteString -> 83 + CountSetBits -> 84 + FindFirstSetBit -> 85 decode = go =<< decodeBuiltin where go 0 = pure AddInteger @@ -2105,7 +2147,11 @@ instance Flat DefaultFun where go 78 = pure ComplementByteString go 79 = pure ReadBit go 80 = pure WriteBits - go 81 = pure ReplicateByteString + go 81 = pure ReplicateByte + go 82 = pure ShiftByteString + go 83 = pure RotateByteString + go 84 = pure CountSetBits + go 85 = pure FindFirstSetBit go t = fail $ "Failed to decode builtin tag, got: " ++ show t size _ n = n + builtinTagWidth diff --git a/plutus-core/plutus-core/test/TypeSynthesis/Golden/DefaultFun/CountSetBits.plc.golden b/plutus-core/plutus-core/test/TypeSynthesis/Golden/DefaultFun/CountSetBits.plc.golden new file mode 100644 index 00000000000..aa49a117436 --- /dev/null +++ b/plutus-core/plutus-core/test/TypeSynthesis/Golden/DefaultFun/CountSetBits.plc.golden @@ -0,0 +1 @@ +bytestring -> integer \ No newline at end of file diff --git a/plutus-core/plutus-core/test/TypeSynthesis/Golden/DefaultFun/FindFirstSetBit.plc.golden b/plutus-core/plutus-core/test/TypeSynthesis/Golden/DefaultFun/FindFirstSetBit.plc.golden new file mode 100644 index 00000000000..aa49a117436 --- /dev/null +++ b/plutus-core/plutus-core/test/TypeSynthesis/Golden/DefaultFun/FindFirstSetBit.plc.golden @@ -0,0 +1 @@ +bytestring -> integer \ No newline at end of file diff --git a/plutus-core/plutus-core/test/TypeSynthesis/Golden/DefaultFun/ReplicateByte.plc.golden b/plutus-core/plutus-core/test/TypeSynthesis/Golden/DefaultFun/ReplicateByte.plc.golden new file mode 100644 index 00000000000..fcb192a96ed --- /dev/null +++ b/plutus-core/plutus-core/test/TypeSynthesis/Golden/DefaultFun/ReplicateByte.plc.golden @@ -0,0 +1 @@ +integer -> integer -> bytestring \ No newline at end of file diff --git a/plutus-core/plutus-core/test/TypeSynthesis/Golden/DefaultFun/RotateByteString.plc.golden b/plutus-core/plutus-core/test/TypeSynthesis/Golden/DefaultFun/RotateByteString.plc.golden new file mode 100644 index 00000000000..fbda7bdf852 --- /dev/null +++ b/plutus-core/plutus-core/test/TypeSynthesis/Golden/DefaultFun/RotateByteString.plc.golden @@ -0,0 +1 @@ +bytestring -> integer -> bytestring \ No newline at end of file diff --git a/plutus-core/plutus-core/test/TypeSynthesis/Golden/DefaultFun/ShiftByteString.plc.golden b/plutus-core/plutus-core/test/TypeSynthesis/Golden/DefaultFun/ShiftByteString.plc.golden new file mode 100644 index 00000000000..fbda7bdf852 --- /dev/null +++ b/plutus-core/plutus-core/test/TypeSynthesis/Golden/DefaultFun/ShiftByteString.plc.golden @@ -0,0 +1 @@ +bytestring -> integer -> bytestring \ No newline at end of file diff --git a/plutus-core/plutus-ir/src/PlutusIR/Transform/RewriteRules/CommuteFnWithConst.hs b/plutus-core/plutus-ir/src/PlutusIR/Transform/RewriteRules/CommuteFnWithConst.hs index 4db5179eb6b..0424128e68f 100644 --- a/plutus-core/plutus-ir/src/PlutusIR/Transform/RewriteRules/CommuteFnWithConst.hs +++ b/plutus-core/plutus-ir/src/PlutusIR/Transform/RewriteRules/CommuteFnWithConst.hs @@ -130,11 +130,15 @@ isCommutative = \case IntegerToByteString -> False ByteStringToInteger -> False -- Currently, this requires commutativity in all arguments, which the - -- logical operations are not. + -- logical and bitwise operations are not. AndByteString -> False OrByteString -> False XorByteString -> False ComplementByteString -> False ReadBit -> False WriteBits -> False - ReplicateByteString -> False + ReplicateByte -> False + ShiftByteString -> False + RotateByteString -> False + CountSetBits -> False + FindFirstSetBit -> False diff --git a/plutus-core/untyped-plutus-core/test/Evaluation/Builtins/Bitwise.hs b/plutus-core/untyped-plutus-core/test/Evaluation/Builtins/Bitwise.hs new file mode 100644 index 00000000000..55341b27a93 --- /dev/null +++ b/plutus-core/untyped-plutus-core/test/Evaluation/Builtins/Bitwise.hs @@ -0,0 +1,446 @@ +-- editorconfig-checker-disable-file + +{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE TypeApplications #-} + +-- | Tests for [this +-- CIP](https://github.com/mlabs-haskell/CIPs/blob/koz/bitwise/CIP-XXXX/CIP-XXXX.md) +module Evaluation.Builtins.Bitwise ( + shiftHomomorphism, + rotateHomomorphism, + csbHomomorphism, + shiftClear, + rotateRollover, + csbRotate, + shiftPosClearLow, + shiftNegClearHigh, + rotateMoveBits, + csbComplement, + csbInclusionExclusion, + csbXor, + ffsReplicate, + ffsXor, + ffsIndex, + ffsZero + ) where + +import Control.Monad (unless) +import Data.ByteString (ByteString) +import Data.ByteString qualified as BS +import Evaluation.Helpers (assertEvaluatesToConstant, evaluateTheSame, evaluateToHaskell, + evaluatesToConstant, forAllByteString, forAllByteStringThat) +import Hedgehog (Property, forAll, property) +import Hedgehog.Gen qualified as Gen +import Hedgehog.Range qualified as Range +import PlutusCore qualified as PLC +import PlutusCore.MkPlc (builtin, mkConstant, mkIterAppNoAnn) +import Test.Tasty (TestTree) +import Test.Tasty.Hedgehog (testPropertyNamed) +import Test.Tasty.HUnit (testCase) + +-- | Finding the first set bit in a bytestring with only zero bytes should always give -1. +ffsZero :: Property +ffsZero = property $ do + len <- forAll . Gen.integral . Range.linear 0 $ 512 + let bs = BS.replicate len 0x00 + let rhs = mkIterAppNoAnn (builtin () PLC.FindFirstSetBit) [ + mkConstant @ByteString () bs + ] + evaluatesToConstant @Integer (negate 1) rhs + +-- | If we find a valid index for the first set bit, then: +-- +-- 1. The specified index should have a set bit; and +-- 2. Any valid smaller index should have a clear bit. +-- +-- We 'hack' the generator slightly here to ensure we don't end up with all-zeroes (or the empty +-- bytestring), as otherwise, the test wouldn't be meaningful. +ffsIndex :: Property +ffsIndex = property $ do + bs <- forAllByteStringThat (BS.any (/= 0x00)) 0 512 + let ffsExp = mkIterAppNoAnn (builtin () PLC.FindFirstSetBit) [ + mkConstant @ByteString () bs + ] + ix <- evaluateToHaskell ffsExp + let hitIxExp = mkIterAppNoAnn (builtin () PLC.ReadBit) [ + mkConstant @ByteString () bs, + mkConstant @Integer () ix + ] + evaluatesToConstant True hitIxExp + unless (ix == 0) $ do + i <- forAll . Gen.integral . Range.linear 0 $ ix - 1 + let missIxExp = mkIterAppNoAnn (builtin () PLC.ReadBit) [ + mkConstant @ByteString () bs, + mkConstant @Integer () i + ] + evaluatesToConstant False missIxExp + +-- | For any choice of bytestring, if we XOR it with itself, there should be no set bits; that is, +-- finding the first set bit should give @-1@. +ffsXor :: Property +ffsXor = property $ do + bs <- forAllByteString 0 512 + semantics <- forAll Gen.bool + let rhsInner = mkIterAppNoAnn (builtin () PLC.XorByteString) [ + mkConstant @Bool () semantics, + mkConstant @ByteString () bs, + mkConstant @ByteString () bs + ] + let rhs = mkIterAppNoAnn (builtin () PLC.FindFirstSetBit) [ + rhsInner + ] + evaluatesToConstant @Integer (negate 1) rhs + +-- | If we replicate any byte any (positive) number of times, the first set bit should be the same as +-- in the case where we replicated it exactly once. +ffsReplicate :: Property +ffsReplicate = property $ do + n <- forAll . Gen.integral . Range.linear 1 $ 512 + w8 <- forAll . Gen.integral . Range.linear 0 $ 255 + let lhsInner = mkIterAppNoAnn (builtin () PLC.ReplicateByte) [ + mkConstant @Integer () n, + mkConstant @Integer () w8 + ] + let lhs = mkIterAppNoAnn (builtin () PLC.FindFirstSetBit) [ + lhsInner + ] + let rhsInner = mkIterAppNoAnn (builtin () PLC.ReplicateByte) [ + mkConstant @Integer () 1, + mkConstant @Integer () w8 + ] + let rhs = mkIterAppNoAnn (builtin () PLC.FindFirstSetBit) [ + rhsInner + ] + evaluateTheSame lhs rhs + +-- | For any bytestring whose bit length is @n@ and has @k@ set bits, its complement should have +-- @n - k@ set bits. +csbComplement :: Property +csbComplement = property $ do + bs <- forAllByteString 0 512 + let bitLen = BS.length bs * 8 + let lhs = mkIterAppNoAnn (builtin () PLC.CountSetBits) [ + mkConstant @ByteString () bs + ] + let rhsComplement = mkIterAppNoAnn (builtin () PLC.ComplementByteString) [ + mkConstant @ByteString () bs + ] + let rhsCount = mkIterAppNoAnn (builtin () PLC.CountSetBits) [ + rhsComplement + ] + let rhs = mkIterAppNoAnn (builtin () PLC.SubtractInteger) [ + mkConstant @Integer () (fromIntegral bitLen), + rhsCount + ] + evaluateTheSame lhs rhs + +-- | The inclusion-exclusion principle: specifically, for any @x@ and @y@, the number of set bits in +-- @x XOR y@ should be the number of set bits in @x OR y@ minus the number of set bits in @x AND y@. +csbInclusionExclusion :: Property +csbInclusionExclusion = property $ do + x <- forAllByteString 0 512 + y <- forAllByteString 0 512 + let lhsInner = mkIterAppNoAnn (builtin () PLC.XorByteString) [ + mkConstant @Bool () False, + mkConstant @ByteString () x, + mkConstant @ByteString () y + ] + let lhs = mkIterAppNoAnn (builtin () PLC.CountSetBits) [ + lhsInner + ] + let rhsOr = mkIterAppNoAnn (builtin () PLC.OrByteString) [ + mkConstant @Bool () False, + mkConstant @ByteString () x, + mkConstant @ByteString () y + ] + let rhsAnd = mkIterAppNoAnn (builtin () PLC.AndByteString) [ + mkConstant @Bool () False, + mkConstant @ByteString () x, + mkConstant @ByteString () y + ] + let rhsCountOr = mkIterAppNoAnn (builtin () PLC.CountSetBits) [ + rhsOr + ] + let rhsCountAnd = mkIterAppNoAnn (builtin () PLC.CountSetBits) [ + rhsAnd + ] + let rhs = mkIterAppNoAnn (builtin () PLC.SubtractInteger) [ + rhsCountOr, + rhsCountAnd + ] + evaluateTheSame lhs rhs + +-- | For any bytestring @x@, the number of set bits in @x XOR x@ should be 0. +csbXor :: Property +csbXor = property $ do + bs <- forAllByteString 0 512 + semantics <- forAll Gen.bool + let rhsInner = mkIterAppNoAnn (builtin () PLC.XorByteString) [ + mkConstant @Bool () semantics, + mkConstant @ByteString () bs, + mkConstant @ByteString () bs + ] + let rhs = mkIterAppNoAnn (builtin () PLC.CountSetBits) [ + rhsInner + ] + evaluatesToConstant @Integer 0 rhs + +-- | There should exist a monoid homomorphism between natural number addition and function composition for +-- shifts over a fixed bytestring argument. +shiftHomomorphism :: [TestTree] +shiftHomomorphism = [ + testPropertyNamed "zero shift is identity" "zero_shift_id" idProp, + -- Because the homomorphism on shifts is more restrictive than on rotations (namely, it is for + -- naturals and their negative equivalents, not integers), we separate the composition property + -- into two: one dealing with non-negative, the other with non-positive. This helps a bit with + -- coverage, as otherwise, we wouldn't necessarily cover both paths equally well, as we'd have to + -- either discard mismatched signs (which are likely) or 'hack them in-place', which would skew + -- distributions. + testPropertyNamed "non-negative addition of shifts is composition" "plus_shift_pos_comp" plusCompProp, + testPropertyNamed "non-positive addition of shifts is composition" "plus_shift_neg_comp" minusCompProp + ] + where + idProp :: Property + idProp = property $ do + bs <- forAllByteString 0 512 + let lhs = mkIterAppNoAnn (builtin () PLC.ShiftByteString) [ + mkConstant @ByteString () bs, + mkConstant @Integer () 0 + ] + evaluatesToConstant bs lhs + plusCompProp :: Property + plusCompProp = property $ do + bs <- forAllByteString 0 512 + i <- forAll . Gen.integral . Range.linear 0 $ 512 + j <- forAll . Gen.integral . Range.linear 0 $ 512 + let lhsInner = mkIterAppNoAnn (builtin () PLC.AddInteger) [ + mkConstant @Integer () i, + mkConstant @Integer () j + ] + let lhs = mkIterAppNoAnn (builtin () PLC.ShiftByteString) [ + mkConstant @ByteString () bs, + lhsInner + ] + let rhsInner = mkIterAppNoAnn (builtin () PLC.ShiftByteString) [ + mkConstant @ByteString () bs, + mkConstant @Integer () i + ] + let rhs = mkIterAppNoAnn (builtin () PLC.ShiftByteString) [ + rhsInner, + mkConstant @Integer () j + ] + evaluateTheSame lhs rhs + minusCompProp :: Property + minusCompProp = property $ do + bs <- forAllByteString 0 512 + i <- forAll . Gen.integral . Range.linear 0 $ negate 512 + j <- forAll . Gen.integral . Range.linear 0 $ negate 512 + let lhsInner = mkIterAppNoAnn (builtin () PLC.AddInteger) [ + mkConstant @Integer () i, + mkConstant @Integer () j + ] + let lhs = mkIterAppNoAnn (builtin () PLC.ShiftByteString) [ + mkConstant @ByteString () bs, + lhsInner + ] + let rhsInner = mkIterAppNoAnn (builtin () PLC.ShiftByteString) [ + mkConstant @ByteString () bs, + mkConstant @Integer () i + ] + let rhs = mkIterAppNoAnn (builtin () PLC.ShiftByteString) [ + rhsInner, + mkConstant @Integer () j + ] + evaluateTheSame lhs rhs + +-- | There should exist a monoid homomorphism between integer addition and function composition for +-- rotations over a fixed bytestring argument. +rotateHomomorphism :: [TestTree] +rotateHomomorphism = [ + testPropertyNamed "zero rotation is identity" "zero_rotate_id" idProp, + testPropertyNamed "addition of rotations is composition" "plus_rotate_comp" compProp + ] + where + idProp :: Property + idProp = property $ do + bs <- forAllByteString 0 512 + let lhs = mkIterAppNoAnn (builtin () PLC.RotateByteString) [ + mkConstant @ByteString () bs, + mkConstant @Integer () 0 + ] + evaluatesToConstant bs lhs + compProp :: Property + compProp = property $ do + bs <- forAllByteString 0 512 + i <- forAll . Gen.integral . Range.linear (negate 512) $ 512 + j <- forAll . Gen.integral . Range.linear (negate 512) $ 512 + let lhsInner = mkIterAppNoAnn (builtin () PLC.AddInteger) [ + mkConstant @Integer () i, + mkConstant @Integer () j + ] + let lhs = mkIterAppNoAnn (builtin () PLC.RotateByteString) [ + mkConstant @ByteString () bs, + lhsInner + ] + let rhsInner = mkIterAppNoAnn (builtin () PLC.RotateByteString) [ + mkConstant @ByteString () bs, + mkConstant @Integer () i + ] + let rhs = mkIterAppNoAnn (builtin () PLC.RotateByteString) [ + rhsInner, + mkConstant @Integer () j + ] + evaluateTheSame lhs rhs + +-- | There should exist a monoid homomorphism between bytestring concatenation and natural number +-- addition. +csbHomomorphism :: [TestTree] +csbHomomorphism = [ + testCase "count of empty is zero" $ do + let lhs = mkIterAppNoAnn (builtin () PLC.CountSetBits) [ + mkConstant @ByteString () "" + ] + assertEvaluatesToConstant @Integer 0 lhs, + testPropertyNamed "count of concat is addition" "concat_count_plus" compProp + ] + where + compProp :: Property + compProp = property $ do + bs1 <- forAllByteString 0 512 + bs2 <- forAllByteString 0 512 + let lhsInner = mkIterAppNoAnn (builtin () PLC.AppendByteString) [ + mkConstant @ByteString () bs1, + mkConstant @ByteString () bs2 + ] + let lhs = mkIterAppNoAnn (builtin () PLC.CountSetBits) [ + lhsInner + ] + let rhsLeft = mkIterAppNoAnn (builtin () PLC.CountSetBits) [ + mkConstant @ByteString () bs1 + ] + let rhsRight = mkIterAppNoAnn (builtin () PLC.CountSetBits) [ + mkConstant @ByteString () bs2 + ] + let rhs = mkIterAppNoAnn (builtin () PLC.AddInteger) [ + rhsLeft, + rhsRight + ] + evaluateTheSame lhs rhs + +-- | Shifting by more than the bit length (either positive or negative) clears the result. +shiftClear :: Property +shiftClear = property $ do + bs <- forAllByteString 0 512 + let bitLen = 8 * BS.length bs + i <- forAll . Gen.integral . Range.linear (negate 256) $ 256 + adjustment <- case signum i of + (-1) -> pure $ negate bitLen + i + -- Here, we shift by the length exactly, so we randomly pick negative or positive + 0 -> forAll . Gen.element $ [bitLen, negate bitLen] + _ -> pure $ bitLen + i + let lhs = mkIterAppNoAnn (builtin () PLC.ShiftByteString) [ + mkConstant @ByteString () bs, + mkConstant @Integer () (fromIntegral adjustment) + ] + let rhsInner = mkIterAppNoAnn (builtin () PLC.LengthOfByteString) [ + mkConstant @ByteString () bs + ] + let rhs = mkIterAppNoAnn (builtin () PLC.ReplicateByte) [ + rhsInner, + mkConstant @Integer () 0 + ] + evaluateTheSame lhs rhs + +-- | Positive shifts clear low-index bits. +shiftPosClearLow :: Property +shiftPosClearLow = property $ do + bs <- forAllByteString 1 512 + let bitLen = 8 * BS.length bs + n <- forAll . Gen.integral . Range.linear 1 $ bitLen - 1 + i <- forAll . Gen.integral . Range.linear 0 $ n - 1 + let lhsInner = mkIterAppNoAnn (builtin () PLC.ShiftByteString) [ + mkConstant @ByteString () bs, + mkConstant @Integer () (fromIntegral n) + ] + let lhs = mkIterAppNoAnn (builtin () PLC.ReadBit) [ + lhsInner, + mkConstant @Integer () (fromIntegral i) + ] + evaluatesToConstant False lhs + +-- | Negative shifts clear high-index bits. +shiftNegClearHigh :: Property +shiftNegClearHigh = property $ do + bs <- forAllByteString 1 512 + let bitLen = 8 * BS.length bs + n <- forAll . Gen.integral . Range.linear 1 $ bitLen - 1 + i <- forAll . Gen.integral . Range.linear 0 $ n - 1 + let lhsInner = mkIterAppNoAnn (builtin () PLC.ShiftByteString) [ + mkConstant @ByteString () bs, + mkConstant @Integer () (fromIntegral . negate $ n) + ] + let lhs = mkIterAppNoAnn (builtin () PLC.ReadBit) [ + lhsInner, + mkConstant @Integer () (fromIntegral $ bitLen - i - 1) + ] + evaluatesToConstant False lhs + +-- | Rotations by more than the bit length 'roll over' bits. +rotateRollover :: Property +rotateRollover = property $ do + bs <- forAllByteString 0 512 + let bitLen = 8 * BS.length bs + i <- forAll . Gen.integral . Range.linear (negate 512) $ 512 + let lhs = mkIterAppNoAnn (builtin () PLC.RotateByteString) [ + mkConstant @ByteString () bs, + mkConstant @Integer () (case signum i of + (-1) -> (negate . fromIntegral $ bitLen) + i + _ -> fromIntegral bitLen + i) + ] + let rhs = mkIterAppNoAnn (builtin () PLC.RotateByteString) [ + mkConstant @ByteString () bs, + mkConstant @Integer () i + ] + evaluateTheSame lhs rhs + +-- | Rotations move bits, but don't change them. +rotateMoveBits :: Property +rotateMoveBits = property $ do + bs <- forAllByteString 1 512 + let bitLen = 8 * BS.length bs + i <- forAll . Gen.integral . Range.linear 0 $ bitLen - 1 + j <- forAll . Gen.integral . Range.linear (negate 256) $ 256 + let lhs = mkIterAppNoAnn (builtin () PLC.ReadBit) [ + mkConstant @ByteString () bs, + mkConstant @Integer () (fromIntegral i) + ] + let rhsRotation = mkIterAppNoAnn (builtin () PLC.RotateByteString) [ + mkConstant @ByteString () bs, + mkConstant @Integer () (fromIntegral j) + ] + let rhsIndex = mkIterAppNoAnn (builtin () PLC.ModInteger) [ + mkConstant @Integer () (fromIntegral $ i + j), + mkConstant @Integer () (fromIntegral bitLen) + ] + let rhs = mkIterAppNoAnn (builtin () PLC.ReadBit) [ + rhsRotation, + rhsIndex + ] + evaluateTheSame lhs rhs + +-- | Rotations do not change how many set (and clear) bits there are. +csbRotate :: Property +csbRotate = property $ do + bs <- forAllByteString 0 512 + i <- forAll . Gen.integral . Range.linear (negate 512) $ 512 + let lhs = mkIterAppNoAnn (builtin () PLC.CountSetBits) [ + mkConstant @ByteString () bs + ] + let rhsInner = mkIterAppNoAnn (builtin () PLC.RotateByteString) [ + mkConstant @ByteString () bs, + mkConstant @Integer () i + ] + let rhs = mkIterAppNoAnn (builtin () PLC.CountSetBits) [ + rhsInner + ] + evaluateTheSame lhs rhs diff --git a/plutus-core/untyped-plutus-core/test/Evaluation/Builtins/Conversion.hs b/plutus-core/untyped-plutus-core/test/Evaluation/Builtins/Conversion.hs index ba5929d7ff1..82653a7b600 100644 --- a/plutus-core/untyped-plutus-core/test/Evaluation/Builtins/Conversion.hs +++ b/plutus-core/untyped-plutus-core/test/Evaluation/Builtins/Conversion.hs @@ -21,7 +21,7 @@ module Evaluation.Builtins.Conversion ( import Evaluation.Builtins.Common (typecheckEvaluateCek) import PlutusCore qualified as PLC -import PlutusCore.Bitwise.Convert (integerToByteStringMaximumOutputLength) +import PlutusCore.Bitwise (integerToByteStringMaximumOutputLength) import PlutusCore.Evaluation.Machine.ExBudgetingDefaults (defaultBuiltinCostModelForTesting) import PlutusCore.MkPlc (builtin, mkConstant, mkIterAppNoAnn) import PlutusPrelude (Word8, def) diff --git a/plutus-core/untyped-plutus-core/test/Evaluation/Builtins/Definition.hs b/plutus-core/untyped-plutus-core/test/Evaluation/Builtins/Definition.hs index 060fc0f2b35..fab83cf11a4 100644 --- a/plutus-core/untyped-plutus-core/test/Evaluation/Builtins/Definition.hs +++ b/plutus-core/untyped-plutus-core/test/Evaluation/Builtins/Definition.hs @@ -28,6 +28,8 @@ import PlutusCore.Pretty import PlutusPrelude import UntypedPlutusCore.Evaluation.Machine.Cek +import Evaluation.Builtins.Bitwise qualified as Bitwise +import Hedgehog hiding (Opaque, Size, Var) import PlutusCore qualified as PLC import PlutusCore.Examples.Builtins import PlutusCore.Examples.Data.Data @@ -54,7 +56,6 @@ import Evaluation.Builtins.Laws qualified as Laws import Evaluation.Builtins.SignatureVerification (ecdsaSecp256k1Prop, ed25519_VariantAProp, ed25519_VariantBProp, ed25519_VariantCProp, schnorrSecp256k1Prop) -import Hedgehog hiding (Opaque, Size, Var) import Hedgehog.Gen qualified as Gen import Hedgehog.Range qualified as Range import Prettyprinter (vsep) @@ -887,7 +888,7 @@ cons = mkConstant () -- Test that the SECP256k1 builtins are behaving correctly test_SignatureVerification :: TestTree test_SignatureVerification = - adjustOption (\x -> max x . HedgehogTestLimit . Just $ 8000) . + adjustOption (\x -> max x . HedgehogTestLimit . Just $ 4000) . testGroup "Signature verification" $ [ testGroup "Ed25519 signatures (VariantA)" [ testPropertyNamed @@ -922,7 +923,7 @@ test_SignatureVerification = -- Test that the Integer <-> ByteString conversion builtins are behaving correctly test_Conversion :: TestTree test_Conversion = - adjustOption (\x -> max x . HedgehogTestLimit . Just $ 8000) . + adjustOption (\x -> max x . HedgehogTestLimit . Just $ 4000) . testGroup "Integer <-> ByteString conversions" $ [ testGroup "Integer -> ByteString" [ --- lengthOfByteString (integerToByteString e d 0) = d @@ -958,10 +959,55 @@ test_Conversion = ] ] +-- Tests of the laws from [this +-- CIP](https://github.com/mlabs-haskell/CIPs/blob/koz/bitwise/CIP-XXXX/CIP-XXXX.md) +test_Bitwise :: TestTree +test_Bitwise = + adjustOption (\x -> max x . HedgehogTestLimit . Just $ 4000) . + testGroup "Bitwise" $ [ + testGroup "shiftByteString" [ + testGroup "homomorphism" Bitwise.shiftHomomorphism, + testPropertyNamed "shifts over bit length clear input" "shift_too_much" + Bitwise.shiftClear, + testPropertyNamed "positive shifts clear low indexes" "shift_pos_low" + Bitwise.shiftPosClearLow, + testPropertyNamed "negative shifts clear high indexes" "shift_neg_high" + Bitwise.shiftNegClearHigh + ], + testGroup "rotateByteString" [ + testGroup "homomorphism" Bitwise.rotateHomomorphism, + testPropertyNamed "rotations over bit length roll over" "rotate_too_much" + Bitwise.rotateRollover, + testPropertyNamed "rotations move bits but don't change them" "rotate_move" + Bitwise.rotateMoveBits + ], + testGroup "countSetBits" [ + testGroup "homomorphism" Bitwise.csbHomomorphism, + testPropertyNamed "rotation preserves count" "popcount_rotate" + Bitwise.csbRotate, + testPropertyNamed "count of the complement" "popcount_complement" + Bitwise.csbComplement, + testPropertyNamed "inclusion-exclusion" "popcount_inclusion_exclusion" + Bitwise.csbInclusionExclusion, + testPropertyNamed "count of self-XOR" "popcount_self_xor" + Bitwise.csbXor + ], + testGroup "findFirstSetBit" [ + testPropertyNamed "find first in zero bytestrings" "ffs_zero" + Bitwise.ffsZero, + testPropertyNamed "find first in replicated" "ffs_replicate" + Bitwise.ffsReplicate, + testPropertyNamed "find first of self-XOR" "ffs_xor" + Bitwise.ffsXor, + testPropertyNamed "found index set, lower indices clear" "ffs_index" + Bitwise.ffsIndex + ] + ] + -- Tests for the logical operations, as per [CIP-122](https://github.com/mlabs-haskell/CIPs/blob/koz/logic-ops/CIP-0122/CIP-0122.md) test_Logical :: TestTree test_Logical = - adjustOption (\x -> max x . HedgehogTestLimit . Just $ 4000) . + adjustOption (\x -> max x . HedgehogTestLimit . Just $ 2000) . testGroup "Logical" $ [ testGroup "andByteString" [ Laws.abelianSemigroupLaws "truncation" PLC.AndByteString False, @@ -988,7 +1034,7 @@ test_Logical = Laws.xorInvoluteLaw, Laws.abelianMonoidLaws "padding" PLC.XorByteString True "" ], - testGroup "bitwiseLogicalComplement" [ + testGroup "complementByteString" [ Laws.complementSelfInverse, Laws.deMorgan ], @@ -998,7 +1044,7 @@ test_Logical = Laws.setSet, Laws.writeBitsHomomorphismLaws ], - testGroup "replicateByteString" [ + testGroup "replicateByte" [ Laws.replicateHomomorphismLaws, Laws.replicateIndex ] @@ -1042,4 +1088,5 @@ test_definition = , test_ConsByteString , test_Conversion , test_Logical + , test_Bitwise ] diff --git a/plutus-core/untyped-plutus-core/test/Evaluation/Builtins/Laws.hs b/plutus-core/untyped-plutus-core/test/Evaluation/Builtins/Laws.hs index a7bbe8021ea..cce4f034f9d 100644 --- a/plutus-core/untyped-plutus-core/test/Evaluation/Builtins/Laws.hs +++ b/plutus-core/untyped-plutus-core/test/Evaluation/Builtins/Laws.hs @@ -22,17 +22,14 @@ module Evaluation.Builtins.Laws ( import Data.ByteString (ByteString) import Data.ByteString qualified as BS -import Evaluation.Builtins.Common (typecheckEvaluateCek, typecheckReadKnownCek) +import Evaluation.Helpers (evaluateTheSame, evaluateToHaskell, evaluatesToConstant, + forAllByteString) import GHC.Exts (fromString) -import Hedgehog (Gen, Property, PropertyT, annotateShow, failure, forAll, forAllWith, property, - (===)) +import Hedgehog (Gen, Property, PropertyT, forAll, forAllWith, property) import Hedgehog.Gen qualified as Gen import Hedgehog.Range qualified as Range -import Numeric (showHex) import PlutusCore qualified as PLC -import PlutusCore.Evaluation.Machine.ExBudgetingDefaults (defaultBuiltinCostModelForTesting) import PlutusCore.MkPlc (builtin, mkConstant, mkIterAppNoAnn) -import PlutusPrelude (Word8, def) import Test.Tasty (TestTree, testGroup) import Test.Tasty.Hedgehog (testPropertyNamed) import UntypedPlutusCore qualified as UPLC @@ -41,10 +38,10 @@ import UntypedPlutusCore qualified as UPLC -- every valid index, namely the byte specified. replicateIndex :: TestTree replicateIndex = testPropertyNamed "every byte is the same" "replicate_all_match" . property $ do - n <- forAll . Gen.integral . Range.linear 1 $ 1024 + n <- forAll . Gen.integral . Range.linear 1 $ 512 b <- forAll . Gen.integral . Range.constant 0 $ 255 i <- forAll . Gen.integral . Range.linear 0 $ n - 1 - let lhsInner = mkIterAppNoAnn (builtin () PLC.ReplicateByteString) [ + let lhsInner = mkIterAppNoAnn (builtin () PLC.ReplicateByte) [ mkConstant @Integer () n, mkConstant @Integer () b ] @@ -52,43 +49,32 @@ replicateIndex = testPropertyNamed "every byte is the same" "replicate_all_match lhsInner, mkConstant @Integer () i ] - let compareExp = mkIterAppNoAnn (builtin () PLC.EqualsInteger) [ - lhs, - mkConstant @Integer () b - ] - evaluateAndVerify (mkConstant @Bool () True) compareExp + evaluatesToConstant @Integer b lhs -- | If you retrieve a bit value at an index, then write that same value to -- the same index, nothing should happen. getSet :: TestTree getSet = testPropertyNamed "get-set" "get_set" . property $ do - bs <- forAllByteString1 + bs <- forAllByteString 1 512 i <- forAllIndexOf bs let lookupExp = mkIterAppNoAnn (builtin () PLC.ReadBit) [ mkConstant @ByteString () bs, mkConstant @Integer () i ] - case typecheckReadKnownCek def defaultBuiltinCostModelForTesting lookupExp of - Left err -> annotateShow err >> failure - Right (Left err) -> annotateShow err >> failure - Right (Right b) -> do - let lhs = mkIterAppNoAnn (builtin () PLC.WriteBits) [ - mkConstant @ByteString () bs, - mkConstant @[(Integer, Bool)] () [(i, b)] - ] - let compareExp = mkIterAppNoAnn (builtin () PLC.EqualsByteString) [ - lhs, - mkConstant @ByteString () bs - ] - evaluateAndVerify (mkConstant @Bool () True) compareExp + b <- evaluateToHaskell lookupExp + let lhs = mkIterAppNoAnn (builtin () PLC.WriteBits) [ + mkConstant @ByteString () bs, + mkConstant @[(Integer, Bool)] () [(i, b)] + ] + evaluatesToConstant bs lhs -- | If you write a bit value to an index, then retrieve the bit value at the -- same index, you should get back what you wrote. setGet :: TestTree setGet = testPropertyNamed "set-get" "set_get" . property $ do - bs <- forAllByteString1 + bs <- forAllByteString 1 512 i <- forAllIndexOf bs b <- forAll Gen.bool let lhsInner = mkIterAppNoAnn (builtin () PLC.WriteBits) [ @@ -99,13 +85,13 @@ setGet = lhsInner, mkConstant @Integer () i ] - evaluateAndVerify (mkConstant @Bool () b) lhs + evaluatesToConstant b lhs -- | If you write twice to the same bit index, the second write should win. setSet :: TestTree setSet = testPropertyNamed "set-set" "set_set" . property $ do - bs <- forAllByteString1 + bs <- forAllByteString 1 512 i <- forAllIndexOf bs b1 <- forAll Gen.bool b2 <- forAll Gen.bool @@ -117,11 +103,7 @@ setSet = mkConstant @ByteString () bs, mkConstant @[(Integer, Bool)] () [(i, b2)] ] - let compareExp = mkIterAppNoAnn (builtin () PLC.EqualsByteString) [ - lhs, - rhs - ] - evaluateAndVerify (mkConstant @Bool () True) compareExp + evaluateTheSame lhs rhs -- | Checks that: -- @@ -137,19 +119,15 @@ writeBitsHomomorphismLaws = where identityProp :: Property identityProp = property $ do - bs <- forAllByteString1 + bs <- forAllByteString 1 512 let lhs = mkIterAppNoAnn (builtin () PLC.WriteBits) [ mkConstant @ByteString () bs, mkConstant @[(Integer, Bool)] () [] ] - let compareExp = mkIterAppNoAnn (builtin () PLC.EqualsByteString) [ - lhs, - mkConstant @ByteString () bs - ] - evaluateAndVerify (mkConstant @Bool () True) compareExp + evaluatesToConstant bs lhs compositionProp :: Property compositionProp = property $ do - bs <- forAllByteString1 + bs <- forAllByteString 1 512 changelist1 <- forAllChangelistOf bs changelist2 <- forAllChangelistOf bs let lhsInner = mkIterAppNoAnn (builtin () PLC.WriteBits) [ @@ -164,11 +142,7 @@ writeBitsHomomorphismLaws = mkConstant @ByteString () bs, mkConstant @[(Integer, Bool)] () (changelist1 <> changelist2) ] - let compareExp = mkIterAppNoAnn (builtin () PLC.EqualsByteString) [ - lhs, - rhs - ] - evaluateAndVerify (mkConstant @Bool () True) compareExp + evaluateTheSame lhs rhs -- | Checks that: -- @@ -186,25 +160,21 @@ replicateHomomorphismLaws = identityProp :: Property identityProp = property $ do b <- forAll . Gen.integral . Range.constant 0 $ 255 - let lhs = mkIterAppNoAnn (builtin () PLC.ReplicateByteString) [ + let lhs = mkIterAppNoAnn (builtin () PLC.ReplicateByte) [ mkConstant @Integer () 0, mkConstant @Integer () b ] - let compareExp = mkIterAppNoAnn (builtin () PLC.EqualsByteString) [ - lhs, - mkConstant @ByteString () "" - ] - evaluateAndVerify (mkConstant @Bool () True) compareExp + evaluatesToConstant @ByteString "" lhs compositionProp :: Property compositionProp = property $ do b <- forAll . Gen.integral . Range.constant 0 $ 255 n1 <- forAll . Gen.integral . Range.linear 0 $ 512 n2 <- forAll . Gen.integral . Range.linear 0 $ 512 - let lhsInner1 = mkIterAppNoAnn (builtin () PLC.ReplicateByteString) [ + let lhsInner1 = mkIterAppNoAnn (builtin () PLC.ReplicateByte) [ mkConstant @Integer () n1, mkConstant @Integer () b ] - let lhsInner2 = mkIterAppNoAnn (builtin () PLC.ReplicateByteString) [ + let lhsInner2 = mkIterAppNoAnn (builtin () PLC.ReplicateByte) [ mkConstant @Integer () n2, mkConstant @Integer () b ] @@ -212,32 +182,24 @@ replicateHomomorphismLaws = lhsInner1, lhsInner2 ] - let rhs = mkIterAppNoAnn (builtin () PLC.ReplicateByteString) [ + let rhs = mkIterAppNoAnn (builtin () PLC.ReplicateByte) [ mkConstant @Integer () (n1 + n2), mkConstant @Integer () b ] - let compareExp = mkIterAppNoAnn (builtin () PLC.EqualsByteString) [ - lhs, - rhs - ] - evaluateAndVerify (mkConstant @Bool () True) compareExp + evaluateTheSame lhs rhs -- | If you complement a 'ByteString' twice, nothing should change. complementSelfInverse :: TestTree complementSelfInverse = testPropertyNamed "self-inverse" "self_inverse" . property $ do - bs <- forAllByteString + bs <- forAllByteString 0 512 let lhsInner = mkIterAppNoAnn (builtin () PLC.ComplementByteString) [ mkConstant @ByteString () bs ] let lhs = mkIterAppNoAnn (builtin () PLC.ComplementByteString) [ lhsInner ] - let compareExp = mkIterAppNoAnn (builtin () PLC.EqualsByteString) [ - lhs, - mkConstant @ByteString () bs - ] - evaluateAndVerify (mkConstant @Bool () True) compareExp + evaluatesToConstant bs lhs -- | Checks that: -- @@ -252,8 +214,8 @@ deMorgan = testGroup "De Morgan's laws" [ go :: UPLC.DefaultFun -> UPLC.DefaultFun -> Property go f g = property $ do semantics <- forAllWith showSemantics Gen.bool - bs1 <- forAllByteString - bs2 <- forAllByteString + bs1 <- forAllByteString 0 512 + bs2 <- forAllByteString 0 512 let lhsInner = mkIterAppNoAnn (builtin () f) [ mkConstant @Bool () semantics, mkConstant @ByteString () bs1, @@ -273,16 +235,12 @@ deMorgan = testGroup "De Morgan's laws" [ rhsInner1, rhsInner2 ] - let compareExp = mkIterAppNoAnn (builtin () PLC.EqualsByteString) [ - lhs, - rhs - ] - evaluateAndVerify (mkConstant @Bool () True) compareExp + evaluateTheSame lhs rhs -- | If you XOR any 'ByteString' with itself twice, nothing should change. xorInvoluteLaw :: TestTree xorInvoluteLaw = testPropertyNamed "involute (both)" "involute_both" . property $ do - bs <- forAllByteString + bs <- forAllByteString 0 512 semantics <- forAllWith showSemantics Gen.bool let lhsInner = mkIterAppNoAnn (builtin () PLC.XorByteString) [ mkConstant @Bool () semantics, @@ -294,11 +252,7 @@ xorInvoluteLaw = testPropertyNamed "involute (both)" "involute_both" . property mkConstant @ByteString () bs, lhsInner ] - let compareExp = mkIterAppNoAnn (builtin () PLC.EqualsByteString) [ - lhs, - mkConstant @ByteString () bs - ] - evaluateAndVerify (mkConstant @Bool () True) compareExp + evaluatesToConstant bs lhs -- | Checks that the first 'DefaultFun' distributes over the second from the -- left, given the specified semantics (as a 'Bool'). More precisely, for @@ -346,17 +300,13 @@ idempotenceLaw name f isPadding = where idempProp :: Property idempProp = property $ do - bs <- forAllByteString + bs <- forAllByteString 0 512 let lhs = mkIterAppNoAnn (builtin () f) [ mkConstant @Bool () isPadding, mkConstant @ByteString () bs, mkConstant @ByteString () bs ] - let compareExp = mkIterAppNoAnn (builtin () PLC.EqualsByteString) [ - lhs, - mkConstant @ByteString () bs - ] - evaluateAndVerify (mkConstant @Bool () True) compareExp + evaluatesToConstant bs lhs -- | Checks that the provided 'ByteString' is an absorbing element for the -- given 'DefaultFun', under the given semantics. Specifically, given @f@ @@ -370,17 +320,13 @@ absorbtionLaw name f isPadding absorber = where absorbProp :: Property absorbProp = property $ do - bs <- forAllByteString + bs <- forAllByteString 0 512 let lhs = mkIterAppNoAnn (builtin () f) [ mkConstant @Bool () isPadding, mkConstant @ByteString () bs, mkConstant @ByteString () absorber ] - let compareExp = mkIterAppNoAnn (builtin () PLC.EqualsByteString) [ - mkConstant @ByteString () absorber, - lhs - ] - evaluateAndVerify (mkConstant @Bool () True) compareExp + evaluatesToConstant absorber lhs -- Helpers @@ -391,9 +337,9 @@ showSemantics b = if b leftDistProp :: UPLC.DefaultFun -> UPLC.DefaultFun -> Bool -> Property leftDistProp f distOp isPadding = property $ do - x <- forAllByteString - y <- forAllByteString - z <- forAllByteString + x <- forAllByteString 0 512 + y <- forAllByteString 0 512 + z <- forAllByteString 0 512 let distLhs = mkIterAppNoAnn (builtin () distOp) [ mkConstant @Bool () isPadding, mkConstant @ByteString () y, @@ -419,17 +365,13 @@ leftDistProp f distOp isPadding = property $ do distRhs1, distRhs2 ] - let compareExp = mkIterAppNoAnn (builtin () PLC.EqualsByteString) [ - lhs, - rhs - ] - evaluateAndVerify (mkConstant @Bool () True) compareExp + evaluateTheSame lhs rhs rightDistProp :: UPLC.DefaultFun -> Bool -> Property rightDistProp f isPadding = property $ do - x <- forAllByteString - y <- forAllByteString - z <- forAllByteString + x <- forAllByteString 0 512 + y <- forAllByteString 0 512 + z <- forAllByteString 0 512 let lhsInner = mkIterAppNoAnn (builtin () f) [ mkConstant @Bool () isPadding, mkConstant @ByteString () x, @@ -455,16 +397,12 @@ rightDistProp f isPadding = property $ do rhsInner1, rhsInner2 ] - let compareExp = mkIterAppNoAnn (builtin () PLC.EqualsByteString) [ - lhs, - rhs - ] - evaluateAndVerify (mkConstant @Bool () True) compareExp + evaluateTheSame lhs rhs commProp :: UPLC.DefaultFun -> Bool -> Property commProp f isPadding = property $ do - data1 <- forAllByteString - data2 <- forAllByteString + data1 <- forAllByteString 0 512 + data2 <- forAllByteString 0 512 let lhs = mkIterAppNoAnn (builtin () f) [ mkConstant @Bool () isPadding, mkConstant @ByteString () data1, @@ -475,17 +413,13 @@ commProp f isPadding = property $ do mkConstant @ByteString () data2, mkConstant @ByteString () data1 ] - let compareExp = mkIterAppNoAnn (builtin () PLC.EqualsByteString) [ - lhs, - rhs - ] - evaluateAndVerify (mkConstant @Bool () True) compareExp + evaluateTheSame lhs rhs assocProp :: UPLC.DefaultFun -> Bool -> Property assocProp f isPadding = property $ do - data1 <- forAllByteString - data2 <- forAllByteString - data3 <- forAllByteString + data1 <- forAllByteString 0 512 + data2 <- forAllByteString 0 512 + data3 <- forAllByteString 0 512 let data12 = mkIterAppNoAnn (builtin () f) [ mkConstant @Bool () isPadding, mkConstant @ByteString () data1, @@ -506,31 +440,17 @@ assocProp f isPadding = property $ do mkConstant @ByteString () data1, data23 ] - let compareExp = mkIterAppNoAnn (builtin () PLC.EqualsByteString) [ - lhs, - rhs - ] - evaluateAndVerify (mkConstant @Bool () True) compareExp + evaluateTheSame lhs rhs unitProp :: UPLC.DefaultFun -> Bool -> ByteString -> Property unitProp f isPadding unit = property $ do - bs <- forAllByteString + bs <- forAllByteString 0 512 let lhs = mkIterAppNoAnn (builtin () f) [ mkConstant @Bool () isPadding, mkConstant @ByteString () bs, mkConstant @ByteString () unit ] - let compareExp = mkIterAppNoAnn (builtin () PLC.EqualsByteString) [ - lhs, - mkConstant @ByteString () bs - ] - evaluateAndVerify (mkConstant @Bool () True) compareExp - -forAllByteString :: PropertyT IO ByteString -forAllByteString = forAllWith hexShow . Gen.bytes . Range.linear 0 $ 1024 - -forAllByteString1 :: PropertyT IO ByteString -forAllByteString1 = forAllWith hexShow . Gen.bytes . Range.linear 1 $ 1024 + evaluatesToConstant bs lhs forAllIndexOf :: ByteString -> PropertyT IO Integer forAllIndexOf bs = forAll . Gen.integral . Range.linear 0 . fromIntegral $ BS.length bs * 8 - 1 @@ -543,23 +463,3 @@ forAllChangelistOf bs = len = BS.length bs genIndex :: Gen Integer genIndex = Gen.integral . Range.linear 0 . fromIntegral $ len * 8 - 1 - -hexShow :: ByteString -> String -hexShow = ("0x" <>) . BS.foldl' (\acc w8 -> acc <> byteToHex w8) "" - where - byteToHex :: Word8 -> String - byteToHex w8 - | w8 < 128 = "0" <> showHex w8 "" - | otherwise = showHex w8 "" - -evaluateAndVerify :: - UPLC.Term UPLC.Name UPLC.DefaultUni UPLC.DefaultFun () -> - PLC.Term UPLC.TyName UPLC.Name UPLC.DefaultUni UPLC.DefaultFun () -> - PropertyT IO () -evaluateAndVerify expected actual = - case typecheckEvaluateCek def defaultBuiltinCostModelForTesting actual of - Left x -> annotateShow x >> failure - Right (res, logs) -> case res of - PLC.EvaluationFailure -> annotateShow logs >> failure - PLC.EvaluationSuccess r -> r === expected - diff --git a/plutus-core/untyped-plutus-core/test/Evaluation/Helpers.hs b/plutus-core/untyped-plutus-core/test/Evaluation/Helpers.hs new file mode 100644 index 00000000000..46a18553fed --- /dev/null +++ b/plutus-core/untyped-plutus-core/test/Evaluation/Helpers.hs @@ -0,0 +1,136 @@ +-- editorconfig-checker-disable-file +{-# LANGUAGE KindSignatures #-} +{-# LANGUAGE LambdaCase #-} + +-- | Various helpers for defining evaluation tests. +module Evaluation.Helpers ( + -- * Generators + forAllByteString, + forAllByteStringThat, + -- * Evaluation helpers + evaluateTheSame, + evaluatesToConstant, + assertEvaluatesToConstant, + evaluateToHaskell, + ) where + +import Data.ByteString (ByteString) +import Data.ByteString qualified as BS +import Data.Kind (Type) +import Evaluation.Builtins.Common (typecheckEvaluateCek, typecheckReadKnownCek) +import GHC.Stack (HasCallStack) +import Hedgehog (PropertyT, annotateShow, failure, forAllWith, (===)) +import Hedgehog.Gen qualified as Gen +import Hedgehog.Range qualified as Range +import Numeric (showHex) +import PlutusCore qualified as PLC +import PlutusCore.Builtin (ReadKnownIn) +import PlutusCore.Evaluation.Machine.ExBudgetingDefaults (defaultBuiltinCostModelForTesting) +import PlutusCore.MkPlc (mkConstant) +import PlutusPrelude (Word8, def) +import Test.Tasty.HUnit (assertEqual, assertFailure) +import UntypedPlutusCore qualified as UPLC + +-- | Given a lower and upper bound (both inclusive) on length, generate a 'ByteString' whose length +-- falls within these bounds. Furthermore, the generated 'ByteString' will show as a list of +-- hex-encoded bytes on a failure, instead of the default 'Show' output. +-- +-- = Note +-- +-- It is the caller's responsibility to ensure that the bounds are sensible: that is, that neither +-- the upper or lower bound are negative, and that the lower bound is not greater than the upper +-- bound. +forAllByteString :: forall (m :: Type -> Type) . + (Monad m, HasCallStack) => + Int -> Int -> PropertyT m ByteString +forAllByteString lo = forAllWith hexShow . Gen.bytes . Range.linear lo + +-- | As 'forAllByteString', but with a postcondition. +-- +-- = Note +-- +-- If the postcondition is unlikely, the generator may eventually fail after too many retries. +-- Ensure that the postcondition is likely to avoid problems. +forAllByteStringThat :: forall (m :: Type -> Type) . + (Monad m, HasCallStack) => + (ByteString -> Bool) -> Int -> Int -> PropertyT m ByteString +forAllByteStringThat p lo = forAllWith hexShow . Gen.filterT p . Gen.bytes . Range.linear lo + +-- | Typechecks and evaluates both PLC expressions. If either of them fail to typecheck, fail the +-- test, noting what the failure was. If both typecheck, but either errors when run, fail the test, +-- noting the log(s) for any failing expression. If both run without error, compare the results +-- using '==='. +evaluateTheSame :: + HasCallStack => + PLC.Term UPLC.TyName UPLC.Name UPLC.DefaultUni UPLC.DefaultFun () -> + PLC.Term UPLC.TyName UPLC.Name UPLC.DefaultUni UPLC.DefaultFun () -> + PropertyT IO () +evaluateTheSame lhs rhs = + case typecheckEvaluateCek def defaultBuiltinCostModelForTesting lhs of + Left x -> annotateShow x >> failure + Right (resLhs, logsLhs) -> case typecheckEvaluateCek def defaultBuiltinCostModelForTesting rhs of + Left x -> annotateShow x >> failure + Right (resRhs, logsRhs) -> case (resLhs, resRhs) of + (PLC.EvaluationFailure, PLC.EvaluationFailure) -> do + annotateShow logsLhs + annotateShow logsRhs + failure + (PLC.EvaluationSuccess rLhs, PLC.EvaluationSuccess rRhs) -> rLhs === rRhs + (PLC.EvaluationFailure, _) -> annotateShow logsLhs >> failure + (_, PLC.EvaluationFailure) -> annotateShow logsRhs >> failure + +-- | As 'evaluateTheSame', but for cases where we want to compare a more complex computation to a +-- constant (as if by @mkConstant@). This is slightly more efficient. +evaluatesToConstant :: forall (a :: Type) . + PLC.Contains UPLC.DefaultUni a => + a -> + PLC.Term UPLC.TyName UPLC.Name UPLC.DefaultUni UPLC.DefaultFun () -> + PropertyT IO () +evaluatesToConstant k expr = + case typecheckEvaluateCek def defaultBuiltinCostModelForTesting expr of + Left err -> annotateShow err >> failure + Right (res, logs) -> case res of + PLC.EvaluationFailure -> annotateShow logs >> failure + PLC.EvaluationSuccess r -> r === mkConstant () k + +-- | Given a PLC expression and an intended type (via a type argument), typecheck the expression, +-- evaluate it, then produce the required Haskell value from the results. If we fail at any stage, +-- instead fail the test and report the failure. +evaluateToHaskell :: forall (a :: Type) . + ReadKnownIn UPLC.DefaultUni (UPLC.Term UPLC.Name UPLC.DefaultUni UPLC.DefaultFun ()) a => + PLC.Term UPLC.TyName UPLC.Name UPLC.DefaultUni UPLC.DefaultFun () -> + PropertyT IO a +evaluateToHaskell expr = + case typecheckReadKnownCek def defaultBuiltinCostModelForTesting expr of + Left err -> annotateShow err >> failure + Right (Left err) -> annotateShow err >> failure + Right (Right x) -> pure x + +-- | As 'evaluatesToConstant', but for a unit instead of a property. +assertEvaluatesToConstant :: forall (a :: Type) . + PLC.Contains UPLC.DefaultUni a => + a -> + PLC.Term UPLC.TyName UPLC.Name UPLC.DefaultUni UPLC.DefaultFun () -> + IO () +assertEvaluatesToConstant k expr = + case typecheckEvaluateCek def defaultBuiltinCostModelForTesting expr of + Left err -> assertFailure . show $ err + Right (res, logs) -> case res of + PLC.EvaluationFailure -> assertFailure . show $ logs + PLC.EvaluationSuccess r -> assertEqual "" r (mkConstant () k) + +-- Helpers + +hexShow :: ByteString -> String +hexShow bs = "[" <> (go . BS.unpack $ bs) <> "]" + where + go :: [Word8] -> String + go = \case + [] -> "" + [w8] -> byteToHex w8 + (w8 : w8s) -> byteToHex w8 <> ", " <> go w8s + +byteToHex :: Word8 -> String +byteToHex w8 + | w8 < 128 = "0x0" <> showHex w8 "" + | otherwise = "0x" <> showHex w8 "" diff --git a/plutus-ledger-api/src/PlutusLedgerApi/Common/Versions.hs b/plutus-ledger-api/src/PlutusLedgerApi/Common/Versions.hs index 4a68bb38c4c..bdec21ac37d 100644 --- a/plutus-ledger-api/src/PlutusLedgerApi/Common/Versions.hs +++ b/plutus-ledger-api/src/PlutusLedgerApi/Common/Versions.hs @@ -121,7 +121,8 @@ builtinsIntroducedIn = Map.fromList [ ]), ((PlutusV3, futurePV), Set.fromList [ AndByteString, OrByteString, XorByteString, ComplementByteString, - ReadBit, WriteBits, ReplicateByteString + ReadBit, WriteBits, ReplicateByte, + ShiftByteString, RotateByteString, CountSetBits, FindFirstSetBit ]) ] diff --git a/plutus-metatheory/src/Builtin.lagda.md b/plutus-metatheory/src/Builtin.lagda.md index f216d83f945..ac170bb5e2c 100644 --- a/plutus-metatheory/src/Builtin.lagda.md +++ b/plutus-metatheory/src/Builtin.lagda.md @@ -535,7 +535,7 @@ postulate {-# COMPILE GHC KECCAK-256 = Hash.keccak_256 #-} {-# COMPILE GHC BLAKE2B-224 = Hash.blake2b_224 #-} -{-# FOREIGN GHC import PlutusCore.Bitwise.Convert qualified as Convert #-} +{-# FOREIGN GHC import PlutusCore.Bitwise qualified as Convert #-} {-# COMPILE GHC BStoI = Convert.byteStringToIntegerWrapper #-} {-# COMPILE GHC ItoBS = \e w n -> builtinResultToMaybe $ Convert.integerToByteStringWrapper e w n #-} diff --git a/plutus-tx-plugin/src/PlutusTx/Compiler/Builtins.hs b/plutus-tx-plugin/src/PlutusTx/Compiler/Builtins.hs index c8741b870ee..297f3297d41 100644 --- a/plutus-tx-plugin/src/PlutusTx/Compiler/Builtins.hs +++ b/plutus-tx-plugin/src/PlutusTx/Compiler/Builtins.hs @@ -283,7 +283,12 @@ builtinNames = [ , 'Builtins.complementByteString , 'Builtins.readBit , 'Builtins.writeBits - , 'Builtins.replicateByteString + , 'Builtins.replicateByte + + , 'Builtins.shiftByteString + , 'Builtins.rotateByteString + , 'Builtins.countSetBits + , 'Builtins.findFirstSetBit ] defineBuiltinTerm :: CompilingDefault uni fun m ann => Ann -> TH.Name -> PIRTerm uni fun -> m () @@ -448,7 +453,13 @@ defineBuiltinTerms = do PLC.ComplementByteString -> defineBuiltinInl 'Builtins.complementByteString PLC.ReadBit -> defineBuiltinInl 'Builtins.readBit PLC.WriteBits -> defineBuiltinInl 'Builtins.writeBits - PLC.ReplicateByteString -> defineBuiltinInl 'Builtins.replicateByteString + PLC.ReplicateByte -> defineBuiltinInl 'Builtins.replicateByte + + -- Other bitwise ops + PLC.ShiftByteString -> defineBuiltinInl 'Builtins.shiftByteString + PLC.RotateByteString -> defineBuiltinInl 'Builtins.rotateByteString + PLC.CountSetBits -> defineBuiltinInl 'Builtins.countSetBits + PLC.FindFirstSetBit -> defineBuiltinInl 'Builtins.findFirstSetBit defineBuiltinTypes :: CompilingDefault uni fun m ann diff --git a/plutus-tx-plugin/test/Budget/9.6/map2.uplc.golden b/plutus-tx-plugin/test/Budget/9.6/map2.uplc.golden index e2e0f98905e..f1bf99b0f21 100644 --- a/plutus-tx-plugin/test/Budget/9.6/map2.uplc.golden +++ b/plutus-tx-plugin/test/Budget/9.6/map2.uplc.golden @@ -212,8 +212,8 @@ program [ (addInteger 7 n) , #534556454e ]) , (constr 0 []) ]) ]) ]) ]))) - (addInteger 3 n)) - (addInteger 4 n)) + (addInteger 4 n)) + (addInteger 3 n)) (\`$dToData` `$dToData` -> (\go eta -> goList (go eta)) (fix1 diff --git a/plutus-tx-plugin/test/Budget/9.6/map3.uplc.golden b/plutus-tx-plugin/test/Budget/9.6/map3.uplc.golden index e2e0f98905e..f1bf99b0f21 100644 --- a/plutus-tx-plugin/test/Budget/9.6/map3.uplc.golden +++ b/plutus-tx-plugin/test/Budget/9.6/map3.uplc.golden @@ -212,8 +212,8 @@ program [ (addInteger 7 n) , #534556454e ]) , (constr 0 []) ]) ]) ]) ]))) - (addInteger 3 n)) - (addInteger 4 n)) + (addInteger 4 n)) + (addInteger 3 n)) (\`$dToData` `$dToData` -> (\go eta -> goList (go eta)) (fix1 diff --git a/plutus-tx/changelog.d/20240523_124052_koz.ross_bitwise_2.md b/plutus-tx/changelog.d/20240523_124052_koz.ross_bitwise_2.md new file mode 100644 index 00000000000..72fea979f9c --- /dev/null +++ b/plutus-tx/changelog.d/20240523_124052_koz.ross_bitwise_2.md @@ -0,0 +1,39 @@ + + + +### Added + +- Builtin wrappers for operations from [this + CIP](https://github.com/mlabs-haskell/CIPs/blob/koz/bitwise/CIP-XXXX/CIP-XXXX.md(. + +### Changed + +- Rename `replicateByteString` to `replicateByte` + + + + diff --git a/plutus-tx/src/PlutusTx/Builtins.hs b/plutus-tx/src/PlutusTx/Builtins.hs index e242df14841..fdbe67fe750 100644 --- a/plutus-tx/src/PlutusTx/Builtins.hs +++ b/plutus-tx/src/PlutusTx/Builtins.hs @@ -117,7 +117,12 @@ module PlutusTx.Builtins ( , complementByteString , readBit , writeBits - , replicateByteString + , replicateByte + -- * Bitwise + , shiftByteString + , rotateByteString + , countSetBits + , findFirstSetBit ) where import Data.Maybe @@ -633,7 +638,7 @@ byteOrderToBool BigEndian = True byteOrderToBool LittleEndian = False -- | Convert a 'BuiltinInteger' into a 'BuiltinByteString', as described in --- [CIP-121](https://github.com/mlabs-haskell/CIPs/tree/koz/to-from-bytestring/CIP-0121). +-- [CIP-121](https://github.com/cardano-foundation/CIPs/tree/master/CIP-0121). -- The first argument indicates the endianness of the conversion and the third -- argument is the integer to be converted, which must be non-negative. The -- second argument must also be non-negative and it indicates the required width @@ -651,7 +656,7 @@ integerToByteString :: ByteOrder -> Integer -> Integer -> BuiltinByteString integerToByteString endianness = BI.integerToByteString (toOpaque (byteOrderToBool endianness)) -- | Convert a 'BuiltinByteString' to a 'BuiltinInteger', as described in --- [CIP-121](https://github.com/mlabs-haskell/CIPs/tree/koz/to-from-bytestring/CIP-0121). +-- [CIP-121](https://github.com/cardano-foundation/CIPs/tree/master/CIP-0121). -- The first argument indicates the endianness of the conversion and the second -- is the bytestring to be converted. There is no limitation on the size of -- the bytestring. The empty bytestring is converted to the integer 0. @@ -660,10 +665,35 @@ byteStringToInteger :: ByteOrder -> BuiltinByteString -> Integer byteStringToInteger endianness = BI.byteStringToInteger (toOpaque (byteOrderToBool endianness)) +-- Bitwise operations + +-- | Shift a 'BuiltinByteString', as per [CIP-123](https://github.com/mlabs-haskell/CIPs/blob/koz/bitwise/CIP-0123/README.md). +{-# INLINEABLE shiftByteString #-} +shiftByteString :: BuiltinByteString -> Integer -> BuiltinByteString +shiftByteString = BI.shiftByteString + +-- | Rotate a 'BuiltinByteString', as per [CIP-123](https://github.com/mlabs-haskell/CIPs/blob/koz/bitwise/CIP-0123/README.md). +{-# INLINEABLE rotateByteString #-} +rotateByteString :: BuiltinByteString -> Integer -> BuiltinByteString +rotateByteString = BI.rotateByteString + +-- | Count the set bits in a 'BuiltinByteString', as per [CIP-123](https://github.com/mlabs-haskell/CIPs/blob/koz/bitwise/CIP-0123/README.md). +{-# INLINEABLE countSetBits #-} +countSetBits :: BuiltinByteString -> Integer +countSetBits = BI.countSetBits + +-- | Find the lowest index of a set bit in a 'BuiltinByteString', as per [CIP-123](https://github.com/mlabs-haskell/CIPs/blob/koz/bitwise/CIP-0123/README.md). +-- +-- If given a 'BuiltinByteString' which consists only of zero bytes (including the empty +-- 'BuiltinByteString', this returns @-1@. +{-# INLINEABLE findFirstSetBit #-} +findFirstSetBit :: BuiltinByteString -> Integer +findFirstSetBit = BI.findFirstSetBit + -- Logical operations --- | Perform logical AND on two 'BuiltinByteString' arguments, as described --- [here](https://github.com/mlabs-haskell/CIPs/blob/koz/logic-ops/CIP-0122/CIP-0122.md#bitwiselogicaland). +-- | Perform logical AND on two 'BuiltinByteString' arguments, as described in +-- [CIP-122](https://github.com/cardano-foundation/CIPs/tree/master/CIP-0122#bitwiselogicaland). -- -- The first argument indicates whether padding semantics should be used or not; -- if 'False', truncation semantics will be used instead. @@ -671,9 +701,9 @@ byteStringToInteger endianness = -- = See also -- -- * [Padding and truncation --- semantics](https://github.com/mlabs-haskell/CIPs/blob/koz/logic-ops/CIP-0122/CIP-0122.md#padding-versus-truncation-semantics) +-- semantics](https://github.com/cardano-foundation/CIPs/tree/master/CIP-0122#padding-versus-truncation-semantics) -- * [Bit indexing --- scheme](https://github.com/mlabs-haskell/CIPs/blob/koz/logic-ops/CIP-0122/CIP-0122.md#bit-indexing-scheme) +-- scheme](https://github.com/cardano-foundation/CIPs/tree/master/CIP-0122#bit-indexing-scheme) {-# INLINEABLE andByteString #-} andByteString :: Bool -> @@ -683,7 +713,7 @@ andByteString :: andByteString b = BI.andByteString (toOpaque b) -- | Perform logical OR on two 'BuiltinByteString' arguments, as described --- [here](https://github.com/mlabs-haskell/CIPs/blob/koz/logic-ops/CIP-0122/CIP-0122.md#bitwiselogicalor). +-- [here](https://github.com/cardano-foundation/CIPs/tree/master/CIP-0122#bitwiselogicalor). -- -- The first argument indicates whether padding semantics should be used or not; -- if 'False', truncation semantics will be used instead. @@ -691,9 +721,9 @@ andByteString b = BI.andByteString (toOpaque b) -- = See also -- -- * [Padding and truncation --- semantics](https://github.com/mlabs-haskell/CIPs/blob/koz/logic-ops/CIP-0122/CIP-0122.md#padding-versus-truncation-semantics) +-- semantics](https://github.com/cardano-foundation/CIPs/tree/master/CIP-0122#padding-versus-truncation-semantics) -- * [Bit indexing --- scheme](https://github.com/mlabs-haskell/CIPs/blob/koz/logic-ops/CIP-0122/CIP-0122.md#bit-indexing-scheme) +-- scheme](https://github.com/cardano-foundation/CIPs/tree/master/CIP-0122#bit-indexing-scheme) {-# INLINEABLE orByteString #-} orByteString :: Bool -> @@ -703,7 +733,7 @@ orByteString :: orByteString b = BI.orByteString (toOpaque b) -- | Perform logical XOR on two 'BuiltinByteString' arguments, as described --- [here](https://github.com/mlabs-haskell/CIPs/blob/koz/logic-ops/CIP-0122/CIP-0122.md#bitwiselogicalxor). +-- [here](https://github.com/cardano-foundation/CIPs/tree/master/CIP-0122#bitwiselogicalxor). -- -- The first argument indicates whether padding semantics should be used or not; -- if 'False', truncation semantics will be used instead. @@ -711,9 +741,9 @@ orByteString b = BI.orByteString (toOpaque b) -- = See also -- -- * [Padding and truncation --- semantics](https://github.com/mlabs-haskell/CIPs/blob/koz/logic-ops/CIP-0122/CIP-0122.md#padding-versus-truncation-semantics) +-- semantics](https://github.com/cardano-foundation/CIPs/tree/master/CIP-0122#padding-versus-truncation-semantics) -- * [Bit indexing --- scheme](https://github.com/mlabs-haskell/CIPs/blob/koz/logic-ops/CIP-0122/CIP-0122.md#bit-indexing-scheme) +-- scheme](https://github.com/cardano-foundation/CIPs/tree/master/CIP-0122#bit-indexing-scheme) {-# INLINEABLE xorByteString #-} xorByteString :: Bool -> @@ -723,12 +753,12 @@ xorByteString :: xorByteString b = BI.xorByteString (toOpaque b) -- | Perform logical complement on a 'BuiltinByteString', as described --- [here](https://github.com/mlabs-haskell/CIPs/blob/koz/logic-ops/CIP-0122/CIP-0122.md#bitwiselogicalcomplement). +-- [here](https://github.com/cardano-foundation/CIPs/tree/master/CIP-0122#bitwiselogicalcomplement). -- -- = See also -- -- * [Bit indexing --- scheme](https://github.com/mlabs-haskell/CIPs/blob/koz/logic-ops/CIP-0122/CIP-0122.md#bit-indexing-scheme) +-- scheme](https://github.com/cardano-foundation/CIPs/tree/master/CIP-0122#bit-indexing-scheme) {-# INLINEABLE complementByteString #-} complementByteString :: BuiltinByteString -> @@ -744,9 +774,9 @@ complementByteString = BI.complementByteString -- = See also -- -- * [Bit indexing --- scheme](https://github.com/mlabs-haskell/CIPs/blob/koz/logic-ops/CIP-0122/CIP-0122.md#bit-indexing-scheme) +-- scheme](https://github.com/cardano-foundation/CIPs/tree/master/CIP-0122#bit-indexing-scheme) -- * [Operation --- description](https://github.com/mlabs-haskell/CIPs/blob/koz/logic-ops/CIP-0122/CIP-0122.md#readbit) +-- description](https://github.com/cardano-foundation/CIPs/tree/master/CIP-0122#readbit) {-# INLINEABLE readBit #-} readBit :: BuiltinByteString -> @@ -763,9 +793,9 @@ readBit bs i = fromOpaque (BI.readBit bs i) -- = See also -- -- * [Bit indexing --- scheme](https://github.com/mlabs-haskell/CIPs/blob/koz/logic-ops/CIP-0122/CIP-0122.md#bit-indexing-scheme) +-- scheme](https://github.com/cardano-foundation/CIPs/tree/master/CIP-0122#bit-indexing-scheme) -- * [Operation --- description](https://github.com/mlabs-haskell/CIPs/blob/koz/logic-ops/CIP-0122/CIP-0122.md#writebits) +-- description](https://github.com/cardano-foundation/CIPs/tree/master/CIP-0122#writebits) {-# INLINEABLE writeBits #-} writeBits :: BuiltinByteString -> @@ -780,10 +810,10 @@ writeBits = BI.writeBits -- = See also -- -- * [Operation --- description](https://github.com/mlabs-haskell/CIPs/blob/koz/logic-ops/CIP-0122/CIP-0122.md#replicateByteString) -{-# INLINEABLE replicateByteString #-} -replicateByteString :: +-- description](https://github.com/cardano-foundation/CIPs/tree/master/CIP-0122#replicateByteString) +{-# INLINEABLE replicateByte #-} +replicateByte :: Integer -> Integer -> BuiltinByteString -replicateByteString = BI.replicateByteString +replicateByte = BI.replicateByte diff --git a/plutus-tx/src/PlutusTx/Builtins/Internal.hs b/plutus-tx/src/PlutusTx/Builtins/Internal.hs index 38da315b54c..37844dd4f57 100644 --- a/plutus-tx/src/PlutusTx/Builtins/Internal.hs +++ b/plutus-tx/src/PlutusTx/Builtins/Internal.hs @@ -32,8 +32,7 @@ import Data.Kind (Type) import Data.Text as Text (Text, empty) import Data.Text.Encoding as Text (decodeUtf8, encodeUtf8) import GHC.Generics (Generic) -import PlutusCore.Bitwise.Convert qualified as Convert -import PlutusCore.Bitwise.Logical qualified as Logical +import PlutusCore.Bitwise qualified as Bitwise import PlutusCore.Builtin (BuiltinResult (..)) import PlutusCore.Crypto.BLS12_381.G1 qualified as BLS12_381.G1 import PlutusCore.Crypto.BLS12_381.G2 qualified as BLS12_381.G2 @@ -694,7 +693,7 @@ integerToByteString -> BuiltinInteger -> BuiltinByteString integerToByteString (BuiltinBool endiannessArg) paddingArg input = - case Convert.integerToByteStringWrapper endiannessArg paddingArg input of + case Bitwise.integerToByteStringWrapper endiannessArg paddingArg input of BuiltinSuccess bs -> BuiltinByteString bs BuiltinSuccessWithLogs logs bs -> traceAll logs $ BuiltinByteString bs BuiltinFailure logs err -> traceAll (logs <> pure (display err)) $ @@ -706,7 +705,40 @@ byteStringToInteger -> BuiltinByteString -> BuiltinInteger byteStringToInteger (BuiltinBool statedEndianness) (BuiltinByteString input) = - Convert.byteStringToIntegerWrapper statedEndianness input + Bitwise.byteStringToIntegerWrapper statedEndianness input + +{- +BITWISE +-} + +{-# NOINLINE shiftByteString #-} +shiftByteString :: + BuiltinByteString -> + BuiltinInteger -> + BuiltinByteString +shiftByteString (BuiltinByteString bs) = + BuiltinByteString . Bitwise.shiftByteString bs . fromIntegral + +{-# NOINLINE rotateByteString #-} +rotateByteString :: + BuiltinByteString -> + BuiltinInteger -> + BuiltinByteString +rotateByteString (BuiltinByteString bs) = + BuiltinByteString . Bitwise.rotateByteString bs . fromIntegral + +{-# NOINLINE countSetBits #-} +countSetBits :: + BuiltinByteString -> + BuiltinInteger +countSetBits (BuiltinByteString bs) = fromIntegral . Bitwise.countSetBits $ bs + +{-# NOINLINE findFirstSetBit #-} +findFirstSetBit :: + BuiltinByteString -> + BuiltinInteger +findFirstSetBit (BuiltinByteString bs) = + fromIntegral . Bitwise.findFirstSetBit $ bs {- LOGICAL @@ -719,7 +751,7 @@ andByteString :: BuiltinByteString -> BuiltinByteString andByteString (BuiltinBool isPaddingSemantics) (BuiltinByteString data1) (BuiltinByteString data2) = - BuiltinByteString . Logical.andByteString isPaddingSemantics data1 $ data2 + BuiltinByteString . Bitwise.andByteString isPaddingSemantics data1 $ data2 {-# NOINLINE orByteString #-} orByteString :: @@ -728,7 +760,7 @@ orByteString :: BuiltinByteString -> BuiltinByteString orByteString (BuiltinBool isPaddingSemantics) (BuiltinByteString data1) (BuiltinByteString data2) = - BuiltinByteString . Logical.orByteString isPaddingSemantics data1 $ data2 + BuiltinByteString . Bitwise.orByteString isPaddingSemantics data1 $ data2 {-# NOINLINE xorByteString #-} xorByteString :: @@ -737,14 +769,14 @@ xorByteString :: BuiltinByteString -> BuiltinByteString xorByteString (BuiltinBool isPaddingSemantics) (BuiltinByteString data1) (BuiltinByteString data2) = - BuiltinByteString . Logical.xorByteString isPaddingSemantics data1 $ data2 + BuiltinByteString . Bitwise.xorByteString isPaddingSemantics data1 $ data2 {-# NOINLINE complementByteString #-} complementByteString :: BuiltinByteString -> BuiltinByteString complementByteString (BuiltinByteString bs) = - BuiltinByteString . Logical.complementByteString $ bs + BuiltinByteString . Bitwise.complementByteString $ bs {-# NOINLINE readBit #-} readBit :: @@ -752,7 +784,7 @@ readBit :: BuiltinInteger -> BuiltinBool readBit (BuiltinByteString bs) i = - case Logical.readBit bs (fromIntegral i) of + case Bitwise.readBit bs (fromIntegral i) of BuiltinFailure logs err -> traceAll (logs <> pure (display err)) $ Haskell.error "readBit errored." BuiltinSuccess b -> BuiltinBool b @@ -765,19 +797,19 @@ writeBits :: BuiltinByteString writeBits (BuiltinByteString bs) (BuiltinList xs) = let unwrapped = fmap (\(BuiltinPair (i, BuiltinBool b)) -> (i, b)) xs in - case Logical.writeBits bs unwrapped of + case Bitwise.writeBits bs unwrapped of BuiltinFailure logs err -> traceAll (logs <> pure (display err)) $ Haskell.error "writeBits errored." BuiltinSuccess bs' -> BuiltinByteString bs' BuiltinSuccessWithLogs logs bs' -> traceAll logs $ BuiltinByteString bs' -{-# NOINLINE replicateByteString #-} -replicateByteString :: +{-# NOINLINE replicateByte #-} +replicateByte :: BuiltinInteger -> BuiltinInteger -> BuiltinByteString -replicateByteString n w8 = - case Logical.replicateByteString (fromIntegral n) (fromIntegral w8) of +replicateByte n w8 = + case Bitwise.replicateByte (fromIntegral n) (fromIntegral w8) of BuiltinFailure logs err -> traceAll (logs <> pure (display err)) $ Haskell.error "byteStringReplicate errored." BuiltinSuccess bs -> BuiltinByteString bs