Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Migrate most shrinker functions to the ir #3962

Merged
merged 40 commits into from
May 29, 2024
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
Show all changes
40 commits
Select commit Hold shift + click to select a range
0ba943c
migrate most shrinker functions to the ir
tybug May 1, 2024
9e3dbc0
format
tybug May 2, 2024
b02a67d
temporarily hack around test_sets_of_fixed_length
tybug May 2, 2024
901d636
fix region deletion logic using start instead of ir_start
tybug May 2, 2024
50b2b13
cache called ir_tree_nodes in addition to computed ones
tybug May 4, 2024
d81d50e
disable dfa shrinking
tybug May 4, 2024
f7d0d29
track ir tree datas in the DataTree
tybug May 5, 2024
f826f69
Merge branch 'master' into shrinker-ir
tybug May 9, 2024
bc418d9
format
tybug May 12, 2024
12d0019
Merge branch 'master' into shrinker-ir
tybug May 12, 2024
172dbd9
respect forced status in datatree simulate for invalid nodes
tybug May 22, 2024
a1b1732
only set invalid_at when required
tybug May 22, 2024
627af8f
track and report misaligned shrinking counts
tybug May 22, 2024
870c20b
check initial value in minimize_nodes
tybug May 22, 2024
e88ce7e
guard against out of bounds replace_all
tybug May 22, 2024
aeccbc5
normalize to minimal buffer when shrinking
tybug May 22, 2024
b69cf17
check ir_value_permitted for all trees
tybug May 22, 2024
866c846
fix invalid_at tests
tybug May 22, 2024
d26fd9b
improve datatree printing
tybug May 22, 2024
1fe712e
disable dfa shrinking tests for now
tybug May 22, 2024
ce2a9c7
allow worse [""] shrink for test_non_trivial_json
tybug May 22, 2024
d9a077d
add slip shrink test
tybug May 22, 2024
2a96a5c
Merge branch 'master' into shrinker-ir
tybug May 22, 2024
9de7ef5
Merge branch 'master' into shrinker-ir
tybug May 23, 2024
8eaa4d4
strip out dfa shrinking logic and tests
tybug May 25, 2024
31c445a
add shrinker coverage tests and pragmas
tybug May 25, 2024
7c1f554
Merge branch 'master' into shrinker-ir
tybug May 25, 2024
fdeeb0d
more clear float shrink logic
tybug May 25, 2024
9ad13e4
add shrinking benchmark code
tybug May 25, 2024
1b5093b
add release notes
tybug May 25, 2024
674c8e6
strengthen test case, copy editing
tybug May 26, 2024
237332f
add TODO_BETTER_SHRINK to where we can improve shrinking
tybug May 26, 2024
6112e22
extract function
tybug May 26, 2024
a9f50c6
clarify comment
tybug May 26, 2024
25dbf2d
try debug
tybug May 26, 2024
6a981fc
refactor slightly
tybug May 26, 2024
41b15e3
don't permit >128 bit integers
tybug May 27, 2024
866256e
dont cache interesting buffer-based datas
tybug May 27, 2024
66f2464
minor fixes, reword release notes
tybug May 27, 2024
3814b54
temporarily skip stateful shrink quality tests
tybug May 29, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
82 changes: 61 additions & 21 deletions hypothesis-python/src/hypothesis/internal/conjecture/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -777,10 +777,6 @@ def end(self, i: int) -> int:
"""Equivalent to self[i].end."""
return self.endpoints[i]

def bounds(self, i: int) -> Tuple[int, int]:
"""Equivalent to self[i].bounds."""
return (self.start(i), self.end(i))

def all_bounds(self) -> Iterable[Tuple[int, int]]:
"""Equivalent to [(b.start, b.end) for b in self]."""
prev = 0
Expand Down Expand Up @@ -965,7 +961,12 @@ class IRNode:
was_forced: bool = attr.ib()
index: Optional[int] = attr.ib(default=None)

def copy(self, *, with_value: IRType) -> "IRNode":
def copy(
self,
*,
with_value: Optional[IRType] = None,
with_kwargs: Optional[IRKWargsType] = None,
) -> "IRNode":
# we may want to allow this combination in the future, but for now it's
# a footgun.
assert not self.was_forced, "modifying a forced node doesn't make sense"
Expand All @@ -974,8 +975,8 @@ def copy(self, *, with_value: IRType) -> "IRNode":
# after copying.
return IRNode(
ir_type=self.ir_type,
value=with_value,
kwargs=self.kwargs,
value=self.value if with_value is None else with_value,
kwargs=self.kwargs if with_kwargs is None else with_kwargs,
was_forced=self.was_forced,
)

Expand Down Expand Up @@ -1048,6 +1049,16 @@ def __eq__(self, other):
and self.was_forced == other.was_forced
)

def __hash__(self):
return hash(
(
self.ir_type,
ir_value_key(self.ir_type, self.value),
ir_kwargs_key(self.ir_type, self.kwargs),
self.was_forced,
)
)

def __repr__(self):
# repr to avoid "BytesWarning: str() on a bytes instance" for bytes nodes
forced_marker = " [forced]" if self.was_forced else ""
Expand Down Expand Up @@ -1087,22 +1098,36 @@ def ir_value_permitted(value, ir_type, kwargs):
raise NotImplementedError(f"unhandled type {type(value)} of ir value {value}")


def ir_value_key(ir_type, v):
if ir_type == "float":
return float_to_int(v)
return v


def ir_kwargs_key(ir_type, kwargs):
if ir_type == "float":
return (
float_to_int(kwargs["min_value"]),
float_to_int(kwargs["max_value"]),
kwargs["allow_nan"],
kwargs["smallest_nonzero_magnitude"],
)
if ir_type == "integer":
return (
kwargs["min_value"],
kwargs["max_value"],
None if kwargs["weights"] is None else tuple(kwargs["weights"]),
kwargs["shrink_towards"],
)
return tuple(kwargs[key] for key in sorted(kwargs))


def ir_value_equal(ir_type, v1, v2):
if ir_type != "float":
return v1 == v2
return float_to_int(v1) == float_to_int(v2)
return ir_value_key(ir_type, v1) == ir_value_key(ir_type, v2)


def ir_kwargs_equal(ir_type, kwargs1, kwargs2):
if ir_type != "float":
return kwargs1 == kwargs2
return (
float_to_int(kwargs1["min_value"]) == float_to_int(kwargs2["min_value"])
and float_to_int(kwargs1["max_value"]) == float_to_int(kwargs2["max_value"])
and kwargs1["allow_nan"] == kwargs2["allow_nan"]
and kwargs1["smallest_nonzero_magnitude"]
== kwargs2["smallest_nonzero_magnitude"]
)
return ir_kwargs_key(ir_type, kwargs1) == ir_kwargs_key(ir_type, kwargs2)


@dataclass_transform()
Expand All @@ -1115,16 +1140,25 @@ class ConjectureResult:
status: Status = attr.ib()
interesting_origin: Optional[InterestingOrigin] = attr.ib()
buffer: bytes = attr.ib()
blocks: Blocks = attr.ib()
# some ConjectureDatas pass through the ir and some pass through buffers.
# the ir does not drive its result through the buffer, which means blocks/examples
# may differ (I think for forced values?) even when the buffer is the same.
# I don't *think* anything was relying on anything but .buffer for result equality,
# though that assumption may be leaning on flakiness detection invariants.
#
# If we don't do this, multiple (semantically, but not pythonically) equivalent results
# get stored in the pareto front.
blocks: Blocks = attr.ib(eq=False)
tybug marked this conversation as resolved.
Show resolved Hide resolved
output: str = attr.ib()
extra_information: Optional[ExtraInformation] = attr.ib()
has_discards: bool = attr.ib()
target_observations: TargetObservations = attr.ib()
tags: FrozenSet[StructuralCoverageTag] = attr.ib()
forced_indices: FrozenSet[int] = attr.ib(repr=False)
examples: Examples = attr.ib(repr=False)
examples: Examples = attr.ib(repr=False, eq=False)
arg_slices: Set[Tuple[int, int]] = attr.ib(repr=False)
slice_comments: Dict[Tuple[int, int], str] = attr.ib(repr=False)
invalid_at: Optional[Tuple[IRTypeName, IRKWargsType]] = attr.ib(repr=False)

index: int = attr.ib(init=False)

Expand Down Expand Up @@ -1977,6 +2011,7 @@ def __init__(
self.extra_information = ExtraInformation()

self.ir_tree_nodes = ir_tree_prefix
self.invalid_at: Optional[Tuple[IRTypeName, IRKWargsType]] = None
self._node_index = 0
self.start_example(TOP_LABEL)

Expand Down Expand Up @@ -2292,12 +2327,16 @@ def _pop_ir_tree_node(self, ir_type: IRTypeName, kwargs: IRKWargsType) -> IRNode
# (in fact, it is possible that giving up early here results in more time
# for useful shrinks to run).
if node.ir_type != ir_type:
# needed for try_shrinking_nodes to see what node was *attempted*
# to be drawn.
self.invalid_at = (ir_type, kwargs)
self.mark_invalid(f"(internal) want a {ir_type} but have a {node.ir_type}")
Copy link
Member Author

@tybug tybug May 1, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Now that we've moved to a stronger typed representation in the ir, we do pretty badly on a class of problems: crossing typed boundaries. In particular shrinking one_of(A, B) where A has a different type/kwargs than B. Before, we would shrink the integer for one_of causing it to choose A instead of B. A interprets the bits differently than B, but we would get lucky most of the time and find an counterexample; and now that we're in A we do more robust shrinking. In the ir we're much more strict and mark_invalid as soon as we have a misalignment, so the initial coincidental slip from B to A never happens.

One thing that works here is drawing randomly instead of giving up when we hit a misalignment, which I would say is basically equivalent to master behavior.

Concretely:

# condition avoids trivial counterexamples
assert minimal(st.integers(101, 200) | st.integers(0, 100), lambda n: n not in [101, 0]) == 102

fails on this branch and passes on master (I'll add this as a test case as well).

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

idle thoughts: I wonder if a short "randomly generate ir trees smaller than our current value" phase when the shrinker would otherwise exit as stuck would get us out of many of these coincidental slips. Possibly not, since if we didn't have to exploit some fixed structure while shrinking, we likely would have found this counterexample during the generation phase.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I haven’t touched Hypothesis in a few years, so take this with a grain of salt, but I vaguely recall that having the shrinker introduce randomness into the data stream has tended to be a bad idea.

Normally, the shrinker tends to make its input both shorter and simpler over time, despite the fact that (999 random bytes) is technically “smaller” than (1000 zero bytes).

But if the shrinker starts reintroducing randomness, this falls apart, and the shrinker will get obsessed with some high-entropy input because it happens to be shorter the current low-entropy input.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Seconded on this. Specifically the problem that tends to result is that the performance becomes terrible, because you end up repeatedly reintroducing lots of expensive-to-shrink regions in order to make minor size gains.

My guess is that you could possibly fix this with some sort of low-entropy generation mode (e.g. always generate one of the two smallest values for a given type) but I'm not sure how robust that would be and it potentially still has the same problem.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That intuition makes sense to me. I think performance may still be ok if we only introduce randomness as a last-resort phase in the shrinker when we are otherwise at a fixpoint. iirc the shrinker used to have some dynamic enabling logic similar to this. Of course we're now complicating the shrink pass logic and have to justify the tradeoff in complexity as worthwhile. Alternatively low-entropy generation also seems reasonable.

It just feels quite bad to not be able to shrink st.integers() | st.floats() because we happened to hit an error in the floats region first, and we are seeing a few regressions in the shrink quality test suite due to this (though how much that maps to reality is unclear to me).


# if a node has different kwargs (and so is misaligned), but has a value
# that is allowed by the expected kwargs, then we can coerce this node
# into an aligned one by using its value. It's unclear how useful this is.
if not ir_value_permitted(node.value, node.ir_type, kwargs):
self.invalid_at = (ir_type, kwargs)
self.mark_invalid(f"(internal) got a {ir_type} but outside the valid range")

return node
Expand Down Expand Up @@ -2328,6 +2367,7 @@ def as_result(self) -> Union[ConjectureResult, _Overrun]:
forced_indices=frozenset(self.forced_indices),
arg_slices=self.arg_slices,
slice_comments=self.slice_comments,
invalid_at=self.invalid_at,
)
assert self.__result is not None
self.blocks.transfer_ownership(self.__result)
Expand Down
56 changes: 50 additions & 6 deletions hypothesis-python/src/hypothesis/internal/conjecture/engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -186,6 +186,7 @@ def __init__(
# shrinking where we need to know about the structure of the
# executed test case.
self.__data_cache = LRUReusedCache(CACHE_SIZE)
self.__data_cache_ir = LRUReusedCache(CACHE_SIZE)

self.__pending_call_explanation = None
self._switch_to_hypothesis_provider = False
Expand Down Expand Up @@ -239,10 +240,52 @@ def __stoppable_test_function(self, data):
# correct engine.
raise

def ir_tree_to_data(self, ir_tree_nodes):
def _cache(self, data):
result = data.as_result()
# when we shrink, we try out of bounds things, which can lead to the same
# data.buffer having multiple outcomes. eg data.buffer=b'' is Status.OVERRUN
# in normal circumstances, but a data with
# ir_nodes=[integer -5 {min_value: 0, max_value: 10}] will also have
# data.buffer=b'' but will be Status.INVALID instead.
#
# I think this indicates that we should be mark_overrun internally in
# these cases instead? alternatively, we'll map Status.INVALID to Overrun
# when caching.
if data.invalid_at is not None:
assert data.status is Status.INVALID
result = Overrun
self.__data_cache[data.buffer] = result
self.__data_cache_ir[tuple(data.examples.ir_tree_nodes)] = result
tybug marked this conversation as resolved.
Show resolved Hide resolved

def cached_test_function_ir(self, ir_tree_nodes):
key = tuple(ir_tree_nodes)
try:
return self.__data_cache_ir[key]
except KeyError:
pass

try:
trial_data = ConjectureData.for_ir_tree(ir_tree_nodes)
self.tree.simulate_test_function(trial_data)
except PreviouslyUnseenBehaviour:
pass
else:
trial_data.freeze()
try:
return self.__data_cache_ir[tuple(trial_data.examples.ir_tree_nodes)]
except KeyError:
pass

data = ConjectureData.for_ir_tree(ir_tree_nodes)
self.__stoppable_test_function(data)
return data
self.test_function(data)
self._cache(data)
# This covers slightly different cases than caching `data`. If the ir
# nodes (1) are not in our cache, (2) are not in our DataTree, and (3)
# running `data` through the test function concludes before consuming
# all ir nodes (eg because of misaligned ir nodes), the initial ir nodes
# will not be cached and we may continue to miss them.
self.__data_cache_ir[key] = data.as_result()
return data.as_result()
tybug marked this conversation as resolved.
Show resolved Hide resolved

def test_function(self, data):
if self.__pending_call_explanation is not None:
Expand Down Expand Up @@ -274,7 +317,7 @@ def test_function(self, data):
),
}
self.stats_per_test_case.append(call_stats)
self.__data_cache[data.buffer] = data.as_result()
self._cache(data)

self.debug_data(data)

Expand Down Expand Up @@ -321,8 +364,9 @@ def test_function(self, data):

# drive the ir tree through the test function to convert it
# to a buffer
data = self.ir_tree_to_data(data.examples.ir_tree_nodes)
self.__data_cache[data.buffer] = data.as_result()
data = ConjectureData.for_ir_tree(data.examples.ir_tree_nodes)
self.__stoppable_test_function(data)
self._cache(data)

key = data.interesting_origin
changed = False
Expand Down