From dcaf502baab860f2c44f34cba4e1febbb1da645f Mon Sep 17 00:00:00 2001 From: Alan Jowett Date: Mon, 14 Oct 2024 13:24:44 -0700 Subject: [PATCH] Implement ebpf_check_constraints_at_label Signed-off-by: Alan Jowett --- src/asm_parse.hpp | 18 +++++++++ src/asm_syntax.hpp | 43 ++++++++++++++++++++ src/config.hpp | 3 ++ src/crab_verifier.cpp | 72 +++++++++++++++++++++++++++++++++ src/crab_verifier.hpp | 39 ++++++++++++++++++ src/test/ebpf_yaml.cpp | 40 ++++++++++++++++++- src/test/ebpf_yaml.hpp | 1 + test-data/add.yaml | 91 ++++++++++++++++++++++++++++++++++++++++++ 8 files changed, 306 insertions(+), 1 deletion(-) diff --git a/src/asm_parse.hpp b/src/asm_parse.hpp index 212aca97f..34d6047df 100644 --- a/src/asm_parse.hpp +++ b/src/asm_parse.hpp @@ -3,7 +3,25 @@ #pragma once #include +#include #include "asm_syntax.hpp" +#include "crab/interval.hpp" +#include "crab/linear_constraint.hpp" Instruction parse_instruction(const std::string& line, const std::map& label_name_to_label); + +/*** + * Parse a set of string form linear constraints into a vector of crab::linear_constraint_t + * + * @param[in] constraints A set of string form linear constraints. + * @param[out] numeric_ranges A vector of numeric ranges that are extracted from the constraints. + * + * @return A vector of crab::linear_constraint_t + * Example of string constraints include: + * r0.type=number + * r0.uvalue=5 + * r0.svalue=5 + */ +std::vector parse_linear_constraints(const std::set& constraints, + std::vector& numeric_ranges); diff --git a/src/asm_syntax.hpp b/src/asm_syntax.hpp index 72c6b4e80..2290bd917 100644 --- a/src/asm_syntax.hpp +++ b/src/asm_syntax.hpp @@ -23,6 +23,49 @@ struct label_t { explicit label_t(const int index, const int to = -1, std::string stack_frame_prefix = {}) noexcept : from(index), to(to), stack_frame_prefix(std::move(stack_frame_prefix)) {} + /** + * @brief Construct a new label t object from a string. + * + * @param[in] string_label The string representation of the label. + * @return None (constructor) + * + * @throw std::invalid_argument The label format is invalid. + * @throw std::out_of_range The label value causes numeric overflow. + * + * Format: [prefix/]from[:to] + * - prefix: Optional stack frame prefix + * - from: Source instruction number + * - to: Optional jump target (-1 means next instruction) + * + * Example labels: + * "2:-1" - a label which falls through to the next instruction. + * "2:5" - a label which jumps to instruction 5. + * "2:-1/5:-1" - a label which falls through to the next instruction, with a stack frame prefix denoting where the + * label was called. + */ + explicit label_t(std::string_view string_label) { + auto pos = string_label.find(STACK_FRAME_DELIMITER); + if (pos != std::string_view::npos) { + stack_frame_prefix = std::string(string_label.substr(0, pos)); + string_label = string_label.substr(pos + 1); + } + + pos = string_label.find(':'); + try { + if (pos != std::string_view::npos) { + from = std::stoi(std::string(string_label.substr(0, pos))); + to = std::stoi(std::string(string_label.substr(pos + 1))); + } else { + from = std::stoi(std::string(string_label)); + to = -1; + } + } catch (const std::invalid_argument& e) { + throw std::invalid_argument("Invalid label format: " + std::string(string_label)); + } catch (const std::out_of_range& e) { + throw std::out_of_range("Label value out of range: " + std::string(string_label)); + } + } + static label_t make_jump(const label_t& src_label, const label_t& target_label) { return label_t{src_label.from, target_label.from, target_label.stack_frame_prefix}; } diff --git a/src/config.hpp b/src/config.hpp index b109a35c8..29cdd8d1a 100644 --- a/src/config.hpp +++ b/src/config.hpp @@ -37,6 +37,9 @@ struct ebpf_verifier_options_t { // Print the BTF types in JSON format. bool dump_btf_types_json = false; + + // Store pre-invariants for use in ebpf_check_constraints_at_label and ebpf_get_invariants_at_label. + bool store_pre_invariants = false; }; struct ebpf_verifier_stats_t { diff --git a/src/crab_verifier.cpp b/src/crab_verifier.cpp index a5af14653..466c12035 100644 --- a/src/crab_verifier.cpp +++ b/src/crab_verifier.cpp @@ -16,6 +16,7 @@ #include "crab/fwd_analyzer.hpp" #include "crab_utils/lazy_allocator.hpp" +#include "asm_parse.hpp" #include "asm_syntax.hpp" #include "crab_verifier.hpp" #include "string_constraints.hpp" @@ -162,6 +163,14 @@ static checks_db get_analysis_report(std::ostream& s, const cfg_t& cfg, const cr return db; } +static thread_local std::optional saved_pre_invariants = std::nullopt; + +static void save_invariants_if_needed(const crab::invariant_table_t& pre_invariants) { + if (thread_local_options.store_pre_invariants) { + saved_pre_invariants = pre_invariants; + } +} + static checks_db get_ebpf_report(std::ostream& s, const cfg_t& cfg, program_info info, const ebpf_verifier_options_t& options, const std::optional& prog = std::nullopt) { @@ -174,6 +183,7 @@ static checks_db get_ebpf_report(std::ostream& s, const cfg_t& cfg, program_info // Get dictionaries of pre-invariants and post-invariants for each basic block. ebpf_domain_t entry_dom = ebpf_domain_t::setup_entry(true); auto [pre_invariants, post_invariants] = run_forward_analyzer(cfg, std::move(entry_dom)); + save_invariants_if_needed(pre_invariants); return get_analysis_report(s, cfg, pre_invariants, post_invariants, prog); } catch (std::runtime_error& e) { // Convert verifier runtime_error exceptions to failure. @@ -220,6 +230,7 @@ std::tuple ebpf_analyze_program_for_test(std::ostream& o try { const cfg_t cfg = prepare_cfg(prog, info, options.cfg_opts); auto [pre_invariants, post_invariants] = run_forward_analyzer(cfg, std::move(entry_inv)); + save_invariants_if_needed(pre_invariants); const checks_db report = get_analysis_report(std::cerr, cfg, pre_invariants, post_invariants); print_report(os, report, prog, false); @@ -262,4 +273,65 @@ void ebpf_verifier_clear_thread_local_state() { global_program_info.clear(); crab::domains::clear_thread_local_state(); crab::domains::SplitDBM::clear_thread_local_state(); + saved_pre_invariants = std::nullopt; +} + +bool ebpf_check_constraints_at_label(std::ostream& os, const std::string& label_string, + const std::set& constraints) { + try { + label_t label = label_t(label_string); + if (!saved_pre_invariants.has_value()) { + os << "No pre-invariants available\n"; + return false; + } + if (saved_pre_invariants.value().find(label) == saved_pre_invariants.value().end()) { + os << "No pre-invariants available for label " << label << "\n"; + return false; + } + ebpf_domain_t from_inv(saved_pre_invariants.value().at(label)); + auto concrete_domain = ebpf_domain_t::from_constraints(constraints, false); + + if (concrete_domain.is_bottom()) { + os << "The provided constraints are unsatisfiable and self-contradictory (concrete domain is bottom)\n"; + os << concrete_domain << "\n"; + return false; + } + if (from_inv.is_bottom()) { + os << "The abstract state is unreachable\n"; + os << from_inv << "\n"; + return false; + } + + if ((from_inv & concrete_domain).is_bottom()) { + os << "Concrete state does not match invariant\n"; + + // Print the concrete state + os << "--- Concrete state ---\n"; + os << concrete_domain << "\n"; + + os << "--- Abstract state ---\n"; + os << from_inv << "\n"; + + return false; + } + + return true; + } catch (std::exception& e) { + os << "Error occurred while checking constraints: " << e.what() << "\n"; + return false; + } +} + +std::set ebpf_get_invariants_at_label(const std::string& label) +{ + // If the label is malformed, throw an exception so the caller can handle it. + label_t l = label_t(label); + + if (!saved_pre_invariants.has_value()) { + return {}; + } + if (saved_pre_invariants.value().find(l) == saved_pre_invariants.value().end()) { + return {}; + } + return saved_pre_invariants.value().at(l).to_set().value(); } diff --git a/src/crab_verifier.hpp b/src/crab_verifier.hpp index 1da1d5e22..1bc88cf90 100644 --- a/src/crab_verifier.hpp +++ b/src/crab_verifier.hpp @@ -26,3 +26,42 @@ int create_map_crab(const EbpfMapType& map_type, uint32_t key_size, uint32_t val EbpfMapDescriptor* find_map_descriptor(int map_fd); void ebpf_verifier_clear_thread_local_state(); + +/** + * @brief Given a label and a set of concrete constraints, check if the concrete constraints match the abstract + * verifier constraints at the label. Requires the `store_pre_invariants` option to be set. + * + * Abstract constraints are computed by the verifier and stored if the `store_pre_invariants` option is set. + * These constraints represent the program state at a specific point in the control flow graph, + * as determined by the static analysis performed by the verifier. + * + * If the 'store_pre_invariants' option is not set, this function will always return false along with an error message. + * This is because the verifier did not store the abstract constraints at each label. + * + * For invalid labels, this function will return false along with an error message. + * + * @param[in,out] os Print output to this stream. + * @param[in] label The location in the CFG to check against. + * @param[in] constraints The concrete state to check. + * @retval true The state is valid. + * @retval false The state is invalid. + * + * Note: + * This can also return false if the label is not found in the CFG or if the label is malformed. + */ +bool ebpf_check_constraints_at_label(std::ostream& os, const std::string& label, + const std::set& constraints); +/** + * @brief Get the invariants at a given label. Requires the `store_pre_invariants` option to be set. + * + * If the 'store_pre_invariants' option is not set, this function will return an empty set + * as no invariants were stored during verification. + * + * @param[in] label The label in the CFG where invariants should be retrieved + * @return The set of invariants at the given label. + * Each invariant represents a constraint on the program state at this point. + * Returns an empty set if no invariants are available. + * @throw std::invalid_argument The label format is invalid + * @throw std::out_of_range The label value causes numeric overflow + */ +std::set ebpf_get_invariants_at_label(const std::string& label); \ No newline at end of file diff --git a/src/test/ebpf_yaml.cpp b/src/test/ebpf_yaml.cpp index 017d17806..4a7ae996a 100644 --- a/src/test/ebpf_yaml.cpp +++ b/src/test/ebpf_yaml.cpp @@ -99,6 +99,7 @@ struct RawTestCase { vector>> raw_blocks; vector post; std::set messages; + std::map> invariants_to_check; }; static vector parse_block(const YAML::Node& block_node) { @@ -126,6 +127,21 @@ static std::set as_set_empty_default(const YAML::Node& optional_node) { return vector_to_set(optional_node.as>()); } +static std::map> parse_invariants_to_check(const YAML::Node& case_node) { + if (!case_node["invariants-to-check"].IsDefined() || case_node["invariants-to-check"].IsNull()) { + return {}; + } + std::map> res; + try { + for (const auto& node : case_node["invariants-to-check"]) { + res.emplace(node.first.as(), vector_to_set(node.second.as>())); + } + } catch (const YAML::Exception& e) { + throw std::runtime_error("Error parsing 'invariants-to-check': " + std::string(e.what())); + } + return res; +} + static RawTestCase parse_case(const YAML::Node& case_node) { return RawTestCase{ .test_case = case_node["test-case"].as(), @@ -134,6 +150,7 @@ static RawTestCase parse_case(const YAML::Node& case_node) { .raw_blocks = parse_code(case_node["code"]), .post = case_node["post"].as>(), .messages = as_set_empty_default(case_node["messages"]), + .invariants_to_check = parse_invariants_to_check(case_node), }; } @@ -213,7 +230,9 @@ static TestCase read_case(const RawTestCase& raw_case) { .assumed_pre_invariant = read_invariant(raw_case.pre), .instruction_seq = raw_cfg_to_instruction_seq(raw_case.raw_blocks), .expected_post_invariant = read_invariant(raw_case.post), - .expected_messages = raw_case.messages}; + .expected_messages = raw_case.messages, + .invariants_to_check = raw_case.invariants_to_check, + }; } static vector read_suite(const string& path) { @@ -254,6 +273,10 @@ std::optional run_yaml_test_case(TestCase test_case, bool debug) { test_case.options.cfg_opts.simplify = false; } + if (!test_case.invariants_to_check.empty()) { + test_case.options.store_pre_invariants = true; + } + ebpf_context_descriptor_t context_descriptor{64, 0, 4, -1}; EbpfProgramType program_type = make_program_type(test_case.name, &context_descriptor); @@ -264,6 +287,21 @@ std::optional run_yaml_test_case(TestCase test_case, bool debug) { ss, test_case.instruction_seq, test_case.assumed_pre_invariant, info, test_case.options); std::set actual_messages = extract_messages(ss.str()); + for (auto& [label, expected_invariant] : test_case.invariants_to_check) { + ss = {}; + try { + if (!ebpf_check_constraints_at_label(ss, label, expected_invariant)) { + // If debug is enabled, print the output of ebpf_check_constraints_at_label. + if (debug) { + std::cout << ss.str(); + } + actual_messages.insert(label + ": Concrete invariants do not match abstract invariants"); + } + } catch (const std::exception& e) { + actual_messages.insert(label + ": Exception occurred during invariant check"); + } + } + if (actual_last_invariant == test_case.expected_post_invariant && actual_messages == test_case.expected_messages) { return {}; } diff --git a/src/test/ebpf_yaml.hpp b/src/test/ebpf_yaml.hpp index b427c5114..3f5de7445 100644 --- a/src/test/ebpf_yaml.hpp +++ b/src/test/ebpf_yaml.hpp @@ -14,6 +14,7 @@ struct TestCase { InstructionSeq instruction_seq; string_invariant expected_post_invariant; std::set expected_messages; + std::map> invariants_to_check; ///< Map from label to expected invariants, used for validating program state at specific labels during test execution. }; void foreach_suite(const std::string& path, const std::function& f); diff --git a/test-data/add.yaml b/test-data/add.yaml index 7d29c6011..e6cc81ab7 100644 --- a/test-data/add.yaml +++ b/test-data/add.yaml @@ -175,3 +175,94 @@ post: - r7.packet_offset-r7.uvalue<=-7 - r7.svalue-r7.packet_offset<=11 - r7.uvalue-r7.packet_offset<=11 + +--- +test-case: check concrete state + +pre: ["r0.type=number", "r0.svalue=0", "r0.uvalue=0", r1.type=number, r1.svalue=0, r1.uvalue=0, r2.type=number, r2.svalue=0, r2.uvalue=0] + +code: + : | + r1 = 1 + r2 = 2 + r1 += r2 + r0 = r1 + +invariants-to-check: + 1: # After the first assignment, r1 should be 1. + - r1.type=number + - r1.svalue=1 + - r1.uvalue=1 + + 2: # After the second assignment, r2 should be 2. + - r2.type=number + - r2.svalue=2 + - r2.uvalue=2 + + 3: # After the addition, r1 should be 3. + - r1.type=number + - r1.svalue=3 + - r1.uvalue=3 + + -2: # After the last assignment, r0 should be 3. + - r0.type=number + - r0.svalue=3 + - r0.uvalue=3 + +post: + - r0.type=number + - r0.uvalue=3 + - r0.svalue=3 + - r1.type=number + - r1.svalue=3 + - r1.uvalue=3 + - r2.type=number + - r2.svalue=2 + - r2.uvalue=2 +--- +test-case: check concrete state - negative + +pre: ["r0.type=number", "r0.svalue=0", "r0.uvalue=0", r1.type=number, r1.svalue=0, r1.uvalue=0, r2.type=number, r2.svalue=0, r2.uvalue=0] + +code: + : | + r1 = 1 + r2 = 2 + r1 += r2 + r0 = r1 + +invariants-to-check: + 1: # After the first assignment, r1 should be 1. + - r1.type=number + - r1.svalue=1 + - r1.uvalue=1 + + 2: # After the second assignment, r2 should be 2. + - r2.type=number + - r2.svalue=2 + - r2.uvalue=2 + + 3: # After the addition, r1 should be 3. + # Using a value of 2 instead of 3 to ensure that the test fails (this is a negative test case). + - r1.type=number + - r1.svalue=2 + - r1.uvalue=2 + + -2: # After the last assignment, r0 should be 3. + - r0.type=number + - r0.svalue=3 + - r0.uvalue=3 + +post: + - r0.type=number + - r0.uvalue=3 + - r0.svalue=3 + - r1.type=number + - r1.svalue=3 + - r1.uvalue=3 + - r2.type=number + - r2.svalue=2 + - r2.uvalue=2 + +messages: + - "3: Concrete invariants do not match abstract invariants"