From e3efb4be8861848ddc80d2c90c6dadb765a59197 Mon Sep 17 00:00:00 2001 From: Alan Jowett Date: Fri, 8 Nov 2024 19:09:58 -0800 Subject: [PATCH 1/2] Implement ebpf_check_constraints_at_label Signed-off-by: Alan Jowett --- src/asm_parse.cpp | 25 +++++++++++-- src/asm_parse.hpp | 21 ++++++++++- src/asm_syntax.hpp | 80 ++++++++++++++++++++++++++++++++++++++++++ src/config.hpp | 3 ++ src/crab_verifier.cpp | 71 +++++++++++++++++++++++++++++++++++++ src/crab_verifier.hpp | 39 ++++++++++++++++++++ src/test/ebpf_yaml.cpp | 56 ++++++++++++++++++++++++----- src/test/ebpf_yaml.hpp | 1 + test-data/add.yaml | 54 ++++++++++++++++++++++++++++ 9 files changed, 338 insertions(+), 12 deletions(-) diff --git a/src/asm_parse.cpp b/src/asm_parse.cpp index 792d6ef43..2eac47383 100644 --- a/src/asm_parse.cpp +++ b/src/asm_parse.cpp @@ -54,6 +54,9 @@ using crab::number_t; #define DOT "[.]" #define TYPE R"_(\s*(shared|number|packet|stack|ctx|map_fd|map_fd_programs)\s*)_" +// regex to match "require [assertion1, assertion2, ...]" +#define REQUIRE R"_(\s*require\s*\[\s*(.*?)\s*\]\s*)_" + static const std::map str_to_binop = { {"", Bin::Op::MOV}, {"+", Bin::Op::ADD}, {"-", Bin::Op::SUB}, {"*", Bin::Op::MUL}, {"/", Bin::Op::UDIV}, {"%", Bin::Op::UMOD}, {"|", Bin::Op::OR}, {"&", Bin::Op::AND}, @@ -133,7 +136,8 @@ static Deref deref(const std::string& width, const std::string& basereg, const s }; } -Instruction parse_instruction(const std::string& line, const std::map& label_name_to_label) { +InstructionOrConstraintsSet parse_instruction(const std::string& line, + const std::map& label_name_to_label) { // treat ";" as a comment std::string text = line.substr(0, line.find(';')); const size_t end = text.find_last_not_of(' '); @@ -221,6 +225,17 @@ Instruction parse_instruction(const std::string& line, const std::map constraint_set; + std::vector numeric_ranges; + std::regex re(R"(\s*,\s*)"); + std::sregex_token_iterator first{constraints.begin(), constraints.end(), re, -1}, last; + for (; first != last; ++first) { + constraint_set.insert(*first); + } + return constraint_set; + } return Undefined{0}; } @@ -246,7 +261,13 @@ static InstructionSeq parse_program(std::istream& is) { if (line.empty()) { continue; } - Instruction ins = parse_instruction(line, {}); + auto ins_or_constraints = parse_instruction(line, {}); + + if (std::holds_alternative(ins_or_constraints)) { + continue; + } + + Instruction ins = convert_to_original(ins_or_constraints).value_or(Undefined{0}); if (std::holds_alternative(ins)) { continue; } diff --git a/src/asm_parse.hpp b/src/asm_parse.hpp index 212aca97f..8dcaba081 100644 --- a/src/asm_parse.hpp +++ b/src/asm_parse.hpp @@ -3,7 +3,26 @@ #pragma once #include +#include #include "asm_syntax.hpp" +#include "crab/interval.hpp" +#include "crab/linear_constraint.hpp" -Instruction parse_instruction(const std::string& line, const std::map& label_name_to_label); +InstructionOrConstraintsSet parse_instruction(const std::string& line, + const std::map& label_name_to_label); + +/*** + * Parse a set of string form linear constraints into a vector of crab::linear_constraint_t + * + * @param[in] constraints A set of string form linear constraints. + * @param[out] numeric_ranges A vector of numeric ranges that are extracted from the constraints. + * + * @return A vector of crab::linear_constraint_t + * Example of string constraints include: + * r0.type=number + * r0.uvalue=5 + * r0.svalue=5 + */ +std::vector parse_linear_constraints(const std::set& constraints, + std::vector& numeric_ranges); diff --git a/src/asm_syntax.hpp b/src/asm_syntax.hpp index a008fd73a..4274640eb 100644 --- a/src/asm_syntax.hpp +++ b/src/asm_syntax.hpp @@ -23,6 +23,49 @@ struct label_t { explicit label_t(const int index, const int to = -1, std::string stack_frame_prefix = {}) noexcept : from(index), to(to), stack_frame_prefix(std::move(stack_frame_prefix)) {} + /** + * @brief Construct a new label t object from a string. + * + * @param[in] string_label The string representation of the label. + * @return None (constructor) + * + * @throw std::invalid_argument The label format is invalid. + * @throw std::out_of_range The label value causes numeric overflow. + * + * Format: [prefix/]from[:to] + * - prefix: Optional stack frame prefix + * - from: Source instruction number + * - to: Optional jump target (-1 means next instruction) + * + * Example labels: + * "2:-1" - a label which falls through to the next instruction. + * "2:5" - a label which jumps to instruction 5. + * "2:-1/5:-1" - a label which falls through to the next instruction, with a stack frame prefix denoting where the + * label was called. + */ + explicit label_t(std::string_view string_label) { + auto pos = string_label.find(STACK_FRAME_DELIMITER); + if (pos != std::string_view::npos) { + stack_frame_prefix = std::string(string_label.substr(0, pos)); + string_label = string_label.substr(pos + 1); + } + + pos = string_label.find(':'); + try { + if (pos != std::string_view::npos) { + from = std::stoi(std::string(string_label.substr(0, pos))); + to = std::stoi(std::string(string_label.substr(pos + 1))); + } else { + from = std::stoi(std::string(string_label)); + to = -1; + } + } catch (const std::invalid_argument& e) { + throw std::invalid_argument("Invalid label format: " + std::string(string_label)); + } catch (const std::out_of_range& e) { + throw std::out_of_range("Label value out of range: " + std::string(string_label)); + } + } + static label_t make_jump(const label_t& src_label, const label_t& target_label) { return label_t{src_label.from, target_label.from, target_label.stack_frame_prefix}; } @@ -315,11 +358,48 @@ struct IncrementLoopCounter { bool operator==(const IncrementLoopCounter&) const = default; }; +// Helper metafunction to append a type to a variant +template +struct append_to_variant; + +template +struct append_to_variant, NewType> { + using type = std::variant; +}; + +// Helper metafunction to check if a type is in a variant +template +struct is_type_in_variant; + +template +struct is_type_in_variant, T> : std::disjunction...> {}; + +// Function to convert ExtendedVariant to OriginalVariant if it doesn't contain the new type +template +std::optional convert_to_original(const ExtendedVariant& extendedVariant) { + std::optional result; + + std::visit( + [&result](auto&& arg) { + using T = std::decay_t; + if constexpr (is_type_in_variant::value) { + result = arg; + } + }, + extendedVariant); + return result; +} + using Instruction = std::variant; +using ConstraintsSet = std::set; +using InstructionOrConstraintsSet = append_to_variant::type; using LabeledInstruction = std::tuple>; +using LabeledConstraints = std::tuple; using InstructionSeq = std::vector; +using ConstraintsSeq = std::vector; +using InstructionAndConstraintsSeq = std::tuple; /// Condition check whether something is a valid size. struct ValidSize { diff --git a/src/config.hpp b/src/config.hpp index b109a35c8..29cdd8d1a 100644 --- a/src/config.hpp +++ b/src/config.hpp @@ -37,6 +37,9 @@ struct ebpf_verifier_options_t { // Print the BTF types in JSON format. bool dump_btf_types_json = false; + + // Store pre-invariants for use in ebpf_check_constraints_at_label and ebpf_get_invariants_at_label. + bool store_pre_invariants = false; }; struct ebpf_verifier_stats_t { diff --git a/src/crab_verifier.cpp b/src/crab_verifier.cpp index a5af14653..fcc0755ec 100644 --- a/src/crab_verifier.cpp +++ b/src/crab_verifier.cpp @@ -16,6 +16,7 @@ #include "crab/fwd_analyzer.hpp" #include "crab_utils/lazy_allocator.hpp" +#include "asm_parse.hpp" #include "asm_syntax.hpp" #include "crab_verifier.hpp" #include "string_constraints.hpp" @@ -162,6 +163,14 @@ static checks_db get_analysis_report(std::ostream& s, const cfg_t& cfg, const cr return db; } +static thread_local std::optional saved_pre_invariants = std::nullopt; + +static void save_invariants_if_needed(const crab::invariant_table_t& pre_invariants) { + if (thread_local_options.store_pre_invariants) { + saved_pre_invariants = pre_invariants; + } +} + static checks_db get_ebpf_report(std::ostream& s, const cfg_t& cfg, program_info info, const ebpf_verifier_options_t& options, const std::optional& prog = std::nullopt) { @@ -174,6 +183,7 @@ static checks_db get_ebpf_report(std::ostream& s, const cfg_t& cfg, program_info // Get dictionaries of pre-invariants and post-invariants for each basic block. ebpf_domain_t entry_dom = ebpf_domain_t::setup_entry(true); auto [pre_invariants, post_invariants] = run_forward_analyzer(cfg, std::move(entry_dom)); + save_invariants_if_needed(pre_invariants); return get_analysis_report(s, cfg, pre_invariants, post_invariants, prog); } catch (std::runtime_error& e) { // Convert verifier runtime_error exceptions to failure. @@ -220,6 +230,7 @@ std::tuple ebpf_analyze_program_for_test(std::ostream& o try { const cfg_t cfg = prepare_cfg(prog, info, options.cfg_opts); auto [pre_invariants, post_invariants] = run_forward_analyzer(cfg, std::move(entry_inv)); + save_invariants_if_needed(pre_invariants); const checks_db report = get_analysis_report(std::cerr, cfg, pre_invariants, post_invariants); print_report(os, report, prog, false); @@ -262,4 +273,64 @@ void ebpf_verifier_clear_thread_local_state() { global_program_info.clear(); crab::domains::clear_thread_local_state(); crab::domains::SplitDBM::clear_thread_local_state(); + saved_pre_invariants = std::nullopt; +} + +bool ebpf_check_constraints_at_label(std::ostream& os, const std::string& label_string, + const std::set& constraints) { + try { + label_t label = label_t(label_string); + if (!saved_pre_invariants.has_value()) { + os << "No pre-invariants available\n"; + return false; + } + if (saved_pre_invariants.value().find(label) == saved_pre_invariants.value().end()) { + os << "No pre-invariants available for label " << label << "\n"; + return false; + } + ebpf_domain_t from_inv(saved_pre_invariants.value().at(label)); + auto concrete_domain = ebpf_domain_t::from_constraints(constraints, false); + + if (concrete_domain.is_bottom()) { + os << "The provided constraints are unsatisfiable and self-contradictory (concrete domain is bottom)\n"; + os << concrete_domain << "\n"; + return false; + } + if (from_inv.is_bottom()) { + os << "The abstract state is unreachable\n"; + os << from_inv << "\n"; + return false; + } + + if ((from_inv & concrete_domain).is_bottom()) { + os << "Concrete state does not match invariant\n"; + + // Print the concrete state + os << "--- Concrete state ---\n"; + os << concrete_domain << "\n"; + + os << "--- Abstract state ---\n"; + os << from_inv << "\n"; + + return false; + } + + return true; + } catch (std::exception& e) { + os << "Error occurred while checking constraints: " << e.what() << "\n"; + return false; + } +} + +std::set ebpf_get_invariants_at_label(const std::string& label) { + // If the label is malformed, throw an exception so the caller can handle it. + label_t l = label_t(label); + + if (!saved_pre_invariants.has_value()) { + return {}; + } + if (saved_pre_invariants.value().find(l) == saved_pre_invariants.value().end()) { + return {}; + } + return saved_pre_invariants.value().at(l).to_set().value(); } diff --git a/src/crab_verifier.hpp b/src/crab_verifier.hpp index 1da1d5e22..bdca6b03d 100644 --- a/src/crab_verifier.hpp +++ b/src/crab_verifier.hpp @@ -26,3 +26,42 @@ int create_map_crab(const EbpfMapType& map_type, uint32_t key_size, uint32_t val EbpfMapDescriptor* find_map_descriptor(int map_fd); void ebpf_verifier_clear_thread_local_state(); + +/** + * @brief Given a label and a set of concrete constraints, check if the concrete constraints match the abstract + * verifier constraints at the label. Requires the `store_pre_invariants` option to be set. + * + * Abstract constraints are computed by the verifier and stored if the `store_pre_invariants` option is set. + * These constraints represent the program state at a specific point in the control flow graph, + * as determined by the static analysis performed by the verifier. + * + * If the 'store_pre_invariants' option is not set, this function will always return false along with an error message. + * This is because the verifier did not store the abstract constraints at each label. + * + * For invalid labels, this function will return false along with an error message. + * + * @param[in,out] os Print output to this stream. + * @param[in] label The location in the CFG to check against. + * @param[in] constraints The concrete state to check. + * @retval true The state is valid. + * @retval false The state is invalid. + * + * Note: + * This can also return false if the label is not found in the CFG or if the label is malformed. + */ +bool ebpf_check_constraints_at_label(std::ostream& os, const std::string& label, + const std::set& constraints); +/** + * @brief Get the invariants at a given label. Requires the `store_pre_invariants` option to be set. + * + * If the 'store_pre_invariants' option is not set, this function will return an empty set + * as no invariants were stored during verification. + * + * @param[in] label The label in the CFG where invariants should be retrieved + * @return The set of invariants at the given label. + * Each invariant represents a constraint on the program state at this point. + * Returns an empty set if no invariants are available. + * @throw std::invalid_argument The label format is invalid + * @throw std::out_of_range The label value causes numeric overflow + */ +std::set ebpf_get_invariants_at_label(const std::string& label); diff --git a/src/test/ebpf_yaml.cpp b/src/test/ebpf_yaml.cpp index 017d17806..aeecf3b39 100644 --- a/src/test/ebpf_yaml.cpp +++ b/src/test/ebpf_yaml.cpp @@ -137,7 +137,8 @@ static RawTestCase parse_case(const YAML::Node& case_node) { }; } -static InstructionSeq raw_cfg_to_instruction_seq(const vector>>& raw_blocks) { +static InstructionAndConstraintsSeq +raw_cfg_to_instruction_seq(const vector>>& raw_blocks) { std::map label_name_to_label; int label_index = 0; @@ -148,11 +149,21 @@ static InstructionSeq raw_cfg_to_instruction_seq(const vector(ins_or_constraints)) { + constraints.emplace_back(label_index, std::get(ins_or_constraints)); + continue; + } + auto ins_opt = convert_to_original(ins_or_constraints); + if (!ins_opt) { + continue; + } + const Instruction& ins = *ins_opt; if (std::holds_alternative(ins)) { std::cout << "text:" << line << "; ins: " << ins << "\n"; } @@ -164,7 +175,7 @@ static InstructionSeq raw_cfg_to_instruction_seq(const vector& raw_options) { @@ -208,12 +219,16 @@ static ebpf_verifier_options_t raw_options_to_options(const std::set& ra } static TestCase read_case(const RawTestCase& raw_case) { - return TestCase{.name = raw_case.test_case, - .options = raw_options_to_options(raw_case.options), - .assumed_pre_invariant = read_invariant(raw_case.pre), - .instruction_seq = raw_cfg_to_instruction_seq(raw_case.raw_blocks), - .expected_post_invariant = read_invariant(raw_case.post), - .expected_messages = raw_case.messages}; + auto instruction_seq_and_constraints = raw_cfg_to_instruction_seq(raw_case.raw_blocks); + return TestCase{ + .name = raw_case.test_case, + .options = raw_options_to_options(raw_case.options), + .assumed_pre_invariant = read_invariant(raw_case.pre), + .instruction_seq = std::get<0>(instruction_seq_and_constraints), + .expected_post_invariant = read_invariant(raw_case.post), + .expected_messages = raw_case.messages, + .invariants_to_check = std::get<1>(instruction_seq_and_constraints), + }; } static vector read_suite(const string& path) { @@ -254,6 +269,10 @@ std::optional run_yaml_test_case(TestCase test_case, bool debug) { test_case.options.cfg_opts.simplify = false; } + if (!test_case.invariants_to_check.empty()) { + test_case.options.store_pre_invariants = true; + } + ebpf_context_descriptor_t context_descriptor{64, 0, 4, -1}; EbpfProgramType program_type = make_program_type(test_case.name, &context_descriptor); @@ -264,6 +283,25 @@ std::optional run_yaml_test_case(TestCase test_case, bool debug) { ss, test_case.instruction_seq, test_case.assumed_pre_invariant, info, test_case.options); std::set actual_messages = extract_messages(ss.str()); + for (auto& [label, expected_invariant] : test_case.invariants_to_check) { + ss = {}; + ss << label; + std::string string_label = ss.str(); + ss = {}; + + try { + if (!ebpf_check_constraints_at_label(ss, string_label, expected_invariant)) { + // If debug is enabled, print the output of ebpf_check_constraints_at_label. + if (debug) { + std::cout << ss.str(); + } + actual_messages.insert(string_label + ": Concrete invariants do not match abstract invariants"); + } + } catch (const std::exception& e) { + actual_messages.insert(string_label + ": Exception occurred during invariant check"); + } + } + if (actual_last_invariant == test_case.expected_post_invariant && actual_messages == test_case.expected_messages) { return {}; } diff --git a/src/test/ebpf_yaml.hpp b/src/test/ebpf_yaml.hpp index b427c5114..f2f78fb75 100644 --- a/src/test/ebpf_yaml.hpp +++ b/src/test/ebpf_yaml.hpp @@ -14,6 +14,7 @@ struct TestCase { InstructionSeq instruction_seq; string_invariant expected_post_invariant; std::set expected_messages; + ConstraintsSeq invariants_to_check; }; void foreach_suite(const std::string& path, const std::function& f); diff --git a/test-data/add.yaml b/test-data/add.yaml index 7d29c6011..5bbb858af 100644 --- a/test-data/add.yaml +++ b/test-data/add.yaml @@ -175,3 +175,57 @@ post: - r7.packet_offset-r7.uvalue<=-7 - r7.svalue-r7.packet_offset<=11 - r7.uvalue-r7.packet_offset<=11 + +--- +test-case: check concrete state + +pre: ["r0.type=number", "r0.svalue=0", "r0.uvalue=0", r1.type=number, r1.svalue=0, r1.uvalue=0, r2.type=number, r2.svalue=0, r2.uvalue=0] + +code: + : | + r1 = 1 + require [r1.type=number, r1.svalue=1, r1.uvalue=r1.svalue] + r2 = 2 + require [r2.type=number, r2.svalue=2, r2.uvalue=r2.svalue] + r1 += r2 + require [r1.type=number, r1.svalue=3, r1.uvalue=r1.svalue] + r0 = r1 + +post: + - r0.type=number + - r0.uvalue=3 + - r0.svalue=3 + - r1.type=number + - r1.svalue=3 + - r1.uvalue=3 + - r2.type=number + - r2.svalue=2 + - r2.uvalue=2 +--- +test-case: check concrete state - negative + +pre: ["r0.type=number", "r0.svalue=0", "r0.uvalue=0", r1.type=number, r1.svalue=0, r1.uvalue=0, r2.type=number, r2.svalue=0, r2.uvalue=0] + +code: + : | + r1 = 1 + require [r1.type=number, r1.svalue=1, r1.uvalue=r1.svalue] + r2 = 2 + require [r2.type=number, r2.svalue=2, r2.uvalue=r2.svalue] + r1 += r2 + require [r1.type=number, r1.svalue=2, r1.uvalue=r1.svalue] + r0 = r1 + +post: + - r0.type=number + - r0.uvalue=3 + - r0.svalue=3 + - r1.type=number + - r1.svalue=3 + - r1.uvalue=3 + - r2.type=number + - r2.svalue=2 + - r2.uvalue=2 + +messages: + - "3: Concrete invariants do not match abstract invariants" From e92408ce0fc8c26a6c8b2e1bc0ef59ea9f8d3072 Mon Sep 17 00:00:00 2001 From: Alan Jowett Date: Sat, 9 Nov 2024 14:18:17 -0800 Subject: [PATCH 2/2] PR feedback Signed-off-by: Alan Jowett --- src/asm_parse.cpp | 1 - test-data/add.yaml | 3 ++- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/asm_parse.cpp b/src/asm_parse.cpp index 2eac47383..e8de828c9 100644 --- a/src/asm_parse.cpp +++ b/src/asm_parse.cpp @@ -228,7 +228,6 @@ InstructionOrConstraintsSet parse_instruction(const std::string& line, if (regex_match(text, m, regex(REQUIRE))) { std::string constraints = m[1]; std::set constraint_set; - std::vector numeric_ranges; std::regex re(R"(\s*,\s*)"); std::sregex_token_iterator first{constraints.begin(), constraints.end(), re, -1}, last; for (; first != last; ++first) { diff --git a/test-data/add.yaml b/test-data/add.yaml index 5bbb858af..bdb920061 100644 --- a/test-data/add.yaml +++ b/test-data/add.yaml @@ -213,6 +213,7 @@ code: r2 = 2 require [r2.type=number, r2.svalue=2, r2.uvalue=r2.svalue] r1 += r2 + # Using 2 instead of 3 to validate that verification fails at an intermediate step. require [r1.type=number, r1.svalue=2, r1.uvalue=r1.svalue] r0 = r1 @@ -228,4 +229,4 @@ post: - r2.uvalue=2 messages: - - "3: Concrete invariants do not match abstract invariants" + - "4: Concrete invariants do not match abstract invariants"