From dd61274930ec0cd17711fab52d2bc9ad3e9053de Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Esteban=20K=C3=BCber?= Date: Thu, 19 Aug 2021 11:40:00 -0700 Subject: [PATCH 1/4] Lint against RTL unicode codepoints in literals and comments Address CVE-2021-42574. --- Cargo.lock | 1 + compiler/rustc_errors/src/emitter.rs | 20 +- compiler/rustc_lint/src/context.rs | 39 +++- .../src/hidden_unicode_codepoints.rs | 161 +++++++++++++++ compiler/rustc_lint/src/lib.rs | 3 + compiler/rustc_lint_defs/src/builtin.rs | 28 +++ compiler/rustc_lint_defs/src/lib.rs | 1 + compiler/rustc_parse/Cargo.toml | 1 + compiler/rustc_parse/src/lexer/mod.rs | 40 +++- .../src/lexer/unescape_error_reporting.rs | 20 +- .../ui/parser/unicode-control-codepoints.rs | 39 ++++ .../parser/unicode-control-codepoints.stderr | 192 ++++++++++++++++++ 12 files changed, 535 insertions(+), 10 deletions(-) create mode 100644 compiler/rustc_lint/src/hidden_unicode_codepoints.rs create mode 100644 src/test/ui/parser/unicode-control-codepoints.rs create mode 100644 src/test/ui/parser/unicode-control-codepoints.stderr diff --git a/Cargo.lock b/Cargo.lock index 4f1a5ca35fa00..f7080a9102001 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4101,6 +4101,7 @@ dependencies = [ "rustc_span", "tracing", "unicode-normalization", + "unicode-width", ] [[package]] diff --git a/compiler/rustc_errors/src/emitter.rs b/compiler/rustc_errors/src/emitter.rs index 29f352ae58559..272e367dd38c8 100644 --- a/compiler/rustc_errors/src/emitter.rs +++ b/compiler/rustc_errors/src/emitter.rs @@ -2054,8 +2054,26 @@ fn num_decimal_digits(num: usize) -> usize { MAX_DIGITS } +// We replace some characters so the CLI output is always consistent and underlines aligned. +const OUTPUT_REPLACEMENTS: &[(char, &str)] = &[ + ('\t', " "), // We do our own tab replacement + ('\u{202A}', ""), // The following unicode text flow control characters are inconsistently + ('\u{202B}', ""), // supported accross CLIs and can cause confusion due to the bytes on disk + ('\u{202D}', ""), // not corresponding to the visible source code, so we replace them always. + ('\u{202E}', ""), + ('\u{2066}', ""), + ('\u{2067}', ""), + ('\u{2068}', ""), + ('\u{202C}', ""), + ('\u{2069}', ""), +]; + fn replace_tabs(str: &str) -> String { - str.replace('\t', " ") + let mut s = str.to_string(); + for (c, replacement) in OUTPUT_REPLACEMENTS { + s = s.replace(*c, replacement); + } + s } fn draw_col_separator(buffer: &mut StyledBuffer, line: usize, col: usize) { diff --git a/compiler/rustc_lint/src/context.rs b/compiler/rustc_lint/src/context.rs index 7dbc3d60439c4..30ae664917df6 100644 --- a/compiler/rustc_lint/src/context.rs +++ b/compiler/rustc_lint/src/context.rs @@ -16,6 +16,7 @@ use self::TargetLint::*; +use crate::hidden_unicode_codepoints::UNICODE_TEXT_FLOW_CHARS; use crate::levels::{is_known_lint_tool, LintLevelsBuilder}; use crate::passes::{EarlyLintPassObject, LateLintPassObject}; use rustc_ast as ast; @@ -40,7 +41,7 @@ use rustc_session::lint::{FutureIncompatibleInfo, Level, Lint, LintBuffer, LintI use rustc_session::Session; use rustc_session::SessionLintStore; use rustc_span::lev_distance::find_best_match_for_name; -use rustc_span::{symbol::Symbol, MultiSpan, Span, DUMMY_SP}; +use rustc_span::{symbol::Symbol, BytePos, MultiSpan, Span, DUMMY_SP}; use rustc_target::abi::{self, LayoutOf}; use tracing::debug; @@ -612,6 +613,42 @@ pub trait LintContext: Sized { // Now, set up surrounding context. let sess = self.sess(); match diagnostic { + BuiltinLintDiagnostics::UnicodeTextFlow(span, content) => { + let spans: Vec<_> = content + .char_indices() + .filter_map(|(i, c)| { + UNICODE_TEXT_FLOW_CHARS.contains(&c).then(|| { + let lo = span.lo() + BytePos(2 + i as u32); + (c, span.with_lo(lo).with_hi(lo + BytePos(c.len_utf8() as u32))) + }) + }) + .collect(); + let (an, s) = match spans.len() { + 1 => ("an ", ""), + _ => ("", "s"), + }; + db.span_label(span, &format!( + "this comment contains {}invisible unicode text flow control codepoint{}", + an, + s, + )); + for (c, span) in &spans { + db.span_label(*span, format!("{:?}", c)); + } + db.note( + "these kind of unicode codepoints change the way text flows on \ + applications that support them, but can cause confusion because they \ + change the order of characters on the screen", + ); + if !spans.is_empty() { + db.multipart_suggestion_with_style( + "if their presence wasn't intentional, you can remove them", + spans.into_iter().map(|(_, span)| (span, "".to_string())).collect(), + Applicability::MachineApplicable, + SuggestionStyle::HideCodeAlways, + ); + } + }, BuiltinLintDiagnostics::Normal => (), BuiltinLintDiagnostics::BareTraitObject(span, is_global) => { let (sugg, app) = match sess.source_map().span_to_snippet(span) { diff --git a/compiler/rustc_lint/src/hidden_unicode_codepoints.rs b/compiler/rustc_lint/src/hidden_unicode_codepoints.rs new file mode 100644 index 0000000000000..1bcdcb806fc43 --- /dev/null +++ b/compiler/rustc_lint/src/hidden_unicode_codepoints.rs @@ -0,0 +1,161 @@ +use crate::{EarlyContext, EarlyLintPass, LintContext}; +use rustc_ast as ast; +use rustc_errors::{Applicability, SuggestionStyle}; +use rustc_span::{BytePos, Span, Symbol}; + +declare_lint! { + /// The `text_direction_codepoint_in_literal` lint detects Unicode codepoints that change the + /// visual representation of text on screen in a way that does not correspond to their on + /// memory representation. + /// + /// ### Explanation + /// + /// The unicode characters `\u{202A}`, `\u{202B}`, `\u{202D}`, `\u{202E}`, `\u{2066}`, + /// `\u{2067}`, `\u{2068}`, `\u{202C}` and `\u{2069}` make the flow of text on screen change + /// its direction on software that supports these codepoints. This makes the text "abc" display + /// as "cba" on screen. By leveraging software that supports these, people can write specially + /// crafted literals that make the surrounding code seem like it's performing one action, when + /// in reality it is performing another. Because of this, we proactively lint against their + /// presence to avoid surprises. + /// + /// ### Example + /// + /// ```rust,compile_fail + /// #![deny(text_direction_codepoint_in_literal)] + /// fn main() { + /// println!("{:?}", '‮'); + /// } + /// ``` + /// + /// {{produces}} + /// + pub TEXT_DIRECTION_CODEPOINT_IN_LITERAL, + Deny, + "detect special Unicode codepoints that affect the visual representation of text on screen, \ + changing the direction in which text flows", +} + +declare_lint_pass!(HiddenUnicodeCodepoints => [TEXT_DIRECTION_CODEPOINT_IN_LITERAL]); + +crate const UNICODE_TEXT_FLOW_CHARS: &[char] = &[ + '\u{202A}', '\u{202B}', '\u{202D}', '\u{202E}', '\u{2066}', '\u{2067}', '\u{2068}', '\u{202C}', + '\u{2069}', +]; + +impl HiddenUnicodeCodepoints { + fn lint_text_direction_codepoint( + &self, + cx: &EarlyContext<'_>, + text: Symbol, + span: Span, + padding: u32, + point_at_inner_spans: bool, + label: &str, + ) { + // Obtain the `Span`s for each of the forbidden chars. + let spans: Vec<_> = text + .as_str() + .char_indices() + .filter_map(|(i, c)| { + UNICODE_TEXT_FLOW_CHARS.contains(&c).then(|| { + let lo = span.lo() + BytePos(i as u32 + padding); + (c, span.with_lo(lo).with_hi(lo + BytePos(c.len_utf8() as u32))) + }) + }) + .collect(); + + cx.struct_span_lint(TEXT_DIRECTION_CODEPOINT_IN_LITERAL, span, |lint| { + let mut err = lint.build(&format!( + "unicode codepoint changing visible direction of text present in {}", + label + )); + let (an, s) = match spans.len() { + 1 => ("an ", ""), + _ => ("", "s"), + }; + err.span_label( + span, + &format!( + "this {} contains {}invisible unicode text flow control codepoint{}", + label, an, s, + ), + ); + if point_at_inner_spans { + for (c, span) in &spans { + err.span_label(*span, format!("{:?}", c)); + } + } + err.note( + "these kind of unicode codepoints change the way text flows on applications that \ + support them, but can cause confusion because they change the order of \ + characters on the screen", + ); + if point_at_inner_spans && !spans.is_empty() { + err.multipart_suggestion_with_style( + "if their presence wasn't intentional, you can remove them", + spans.iter().map(|(_, span)| (*span, "".to_string())).collect(), + Applicability::MachineApplicable, + SuggestionStyle::HideCodeAlways, + ); + err.multipart_suggestion( + "if you want to keep them but make them visible in your source code, you can \ + escape them", + spans + .into_iter() + .map(|(c, span)| { + let c = format!("{:?}", c); + (span, c[1..c.len() - 1].to_string()) + }) + .collect(), + Applicability::MachineApplicable, + ); + } else { + // FIXME: in other suggestions we've reversed the inner spans of doc comments. We + // should do the same here to provide the same good suggestions as we do for + // literals above. + err.note("if their presence wasn't intentional, you can remove them"); + err.note(&format!( + "if you want to keep them but make them visible in your source code, you can \ + escape them: {}", + spans + .into_iter() + .map(|(c, _)| { format!("{:?}", c) }) + .collect::>() + .join(", "), + )); + } + err.emit(); + }); + } +} +impl EarlyLintPass for HiddenUnicodeCodepoints { + fn check_attribute(&mut self, cx: &EarlyContext<'_>, attr: &ast::Attribute) { + if let ast::AttrKind::DocComment(_, comment) = attr.kind { + if comment.as_str().contains(UNICODE_TEXT_FLOW_CHARS) { + self.lint_text_direction_codepoint(cx, comment, attr.span, 0, false, "doc comment"); + } + } + } + + fn check_expr(&mut self, cx: &EarlyContext<'_>, expr: &ast::Expr) { + // byte strings are already handled well enough by `EscapeError::NonAsciiCharInByteString` + let (text, span, padding) = match &expr.kind { + ast::ExprKind::Lit(ast::Lit { token, kind, span }) => { + let text = token.symbol; + if !text.as_str().contains(UNICODE_TEXT_FLOW_CHARS) { + return; + } + let padding = match kind { + // account for `"` or `'` + ast::LitKind::Str(_, ast::StrStyle::Cooked) | ast::LitKind::Char(_) => 1, + // account for `r###"` + ast::LitKind::Str(_, ast::StrStyle::Raw(val)) => *val as u32 + 2, + _ => return, + }; + (text, span, padding) + } + _ => return, + }; + self.lint_text_direction_codepoint(cx, text, *span, padding, true, "literal"); + } +} diff --git a/compiler/rustc_lint/src/lib.rs b/compiler/rustc_lint/src/lib.rs index ef4bda666ba06..b65fa419fadd7 100644 --- a/compiler/rustc_lint/src/lib.rs +++ b/compiler/rustc_lint/src/lib.rs @@ -48,6 +48,7 @@ mod array_into_iter; pub mod builtin; mod context; mod early; +pub mod hidden_unicode_codepoints; mod internal; mod late; mod levels; @@ -77,6 +78,7 @@ use rustc_span::Span; use array_into_iter::ArrayIntoIter; use builtin::*; +use hidden_unicode_codepoints::*; use internal::*; use methods::*; use non_ascii_idents::*; @@ -128,6 +130,7 @@ macro_rules! early_lint_passes { DeprecatedAttr: DeprecatedAttr::new(), WhileTrue: WhileTrue, NonAsciiIdents: NonAsciiIdents, + HiddenUnicodeCodepoints: HiddenUnicodeCodepoints, IncompleteFeatures: IncompleteFeatures, RedundantSemicolons: RedundantSemicolons, UnusedDocComment: UnusedDocComment, diff --git a/compiler/rustc_lint_defs/src/builtin.rs b/compiler/rustc_lint_defs/src/builtin.rs index 8fb678e2d20fb..e9cd30a058f5c 100644 --- a/compiler/rustc_lint_defs/src/builtin.rs +++ b/compiler/rustc_lint_defs/src/builtin.rs @@ -3416,3 +3416,31 @@ declare_lint! { Warn, "`break` expression with label and unlabeled loop as value expression" } + +declare_lint! { + /// The `text_direction_codepoint_in_comment` lint detects Unicode codepoints in comments that + /// change the visual representation of text on screen in a way that does not correspond to + /// their on memory representation. + /// + /// ### Example + /// + /// ```rust,compile_fail + /// #![deny(text_direction_codepoint_in_comment)] + /// fn main() { + /// println!("{:?}"); // '‮'); + /// } + /// ``` + /// + /// {{produces}} + /// + /// ### Explanation + /// + /// Unicode allows changing the visual flow of text on screen in order to support scripts that + /// are written right-to-left, but a specially crafted comment can make code that will be + /// compiled appear to be part of a comment, depending on the software used to read the code. + /// To avoid potential problems or confusion, such as in CVE-2021-42574, by default we deny + /// their use. + pub TEXT_DIRECTION_CODEPOINT_IN_COMMENT, + Deny, + "invisible directionality-changing codepoints in comment" +} diff --git a/compiler/rustc_lint_defs/src/lib.rs b/compiler/rustc_lint_defs/src/lib.rs index f89d531b5ef5c..feac2a7cfa48a 100644 --- a/compiler/rustc_lint_defs/src/lib.rs +++ b/compiler/rustc_lint_defs/src/lib.rs @@ -306,6 +306,7 @@ pub enum BuiltinLintDiagnostics { TrailingMacro(bool, Ident), BreakWithLabelAndLoop(Span), NamedAsmLabel(String), + UnicodeTextFlow(Span, String), } /// Lints that are buffered up early on in the `Session` before the diff --git a/compiler/rustc_parse/Cargo.toml b/compiler/rustc_parse/Cargo.toml index 1ca1a92252ea4..b67195e6e2206 100644 --- a/compiler/rustc_parse/Cargo.toml +++ b/compiler/rustc_parse/Cargo.toml @@ -18,3 +18,4 @@ rustc_session = { path = "../rustc_session" } rustc_span = { path = "../rustc_span" } rustc_ast = { path = "../rustc_ast" } unicode-normalization = "0.1.11" +unicode-width = "0.1.4" diff --git a/compiler/rustc_parse/src/lexer/mod.rs b/compiler/rustc_parse/src/lexer/mod.rs index 1e65cc27154a8..8e90f73b44edd 100644 --- a/compiler/rustc_parse/src/lexer/mod.rs +++ b/compiler/rustc_parse/src/lexer/mod.rs @@ -4,7 +4,9 @@ use rustc_ast::tokenstream::{Spacing, TokenStream}; use rustc_errors::{error_code, Applicability, DiagnosticBuilder, FatalError, PResult}; use rustc_lexer::unescape::{self, Mode}; use rustc_lexer::{Base, DocStyle, RawStrError}; -use rustc_session::lint::builtin::RUST_2021_PREFIXES_INCOMPATIBLE_SYNTAX; +use rustc_session::lint::builtin::{ + TEXT_DIRECTION_CODEPOINT_IN_COMMENT, RUST_2021_PREFIXES_INCOMPATIBLE_SYNTAX, +}; use rustc_session::lint::BuiltinLintDiagnostics; use rustc_session::parse::ParseSess; use rustc_span::symbol::{sym, Symbol}; @@ -129,6 +131,28 @@ impl<'a> StringReader<'a> { .struct_span_fatal(self.mk_sp(from_pos, to_pos), &format!("{}: {}", m, escaped_char(c))) } + /// Detect usages of Unicode codepoints changing the direction of the text on screen and loudly + /// complain about it. + fn lint_unicode_text_flow(&self, start: BytePos) { + // Opening delimiter of the length 2 is not included into the comment text. + let content_start = start + BytePos(2); + let content = self.str_from(content_start); + let span = self.mk_sp(start, self.pos); + const UNICODE_TEXT_FLOW_CHARS: &[char] = &[ + '\u{202A}', '\u{202B}', '\u{202D}', '\u{202E}', '\u{2066}', '\u{2067}', '\u{2068}', + '\u{202C}', '\u{2069}', + ]; + if content.contains(UNICODE_TEXT_FLOW_CHARS) { + self.sess.buffer_lint_with_diagnostic( + &TEXT_DIRECTION_CODEPOINT_IN_COMMENT, + span, + ast::CRATE_NODE_ID, + "unicode codepoint changing visible direction of text present in comment", + BuiltinLintDiagnostics::UnicodeTextFlow(span, content.to_string()), + ); + } + } + /// Turns simple `rustc_lexer::TokenKind` enum into a rich /// `rustc_ast::TokenKind`. This turns strings into interned /// symbols and runs additional validation. @@ -136,7 +160,12 @@ impl<'a> StringReader<'a> { Some(match token { rustc_lexer::TokenKind::LineComment { doc_style } => { // Skip non-doc comments - let doc_style = doc_style?; + let doc_style = if let Some(doc_style) = doc_style { + doc_style + } else { + self.lint_unicode_text_flow(start); + return None; + }; // Opening delimiter of the length 3 is not included into the symbol. let content_start = start + BytePos(3); @@ -158,7 +187,12 @@ impl<'a> StringReader<'a> { } // Skip non-doc comments - let doc_style = doc_style?; + let doc_style = if let Some(doc_style) = doc_style { + doc_style + } else { + self.lint_unicode_text_flow(start); + return None; + }; // Opening delimiter of the length 3 and closing delimiter of the length 2 // are not included into the symbol. diff --git a/compiler/rustc_parse/src/lexer/unescape_error_reporting.rs b/compiler/rustc_parse/src/lexer/unescape_error_reporting.rs index aa6b424ce2b57..e26d094a0e28e 100644 --- a/compiler/rustc_parse/src/lexer/unescape_error_reporting.rs +++ b/compiler/rustc_parse/src/lexer/unescape_error_reporting.rs @@ -154,12 +154,17 @@ pub(crate) fn emit_unescape_error( assert!(mode.is_bytes()); let (c, span) = last_char(); let mut err = handler.struct_span_err(span, "non-ASCII character in byte constant"); - err.span_label(span, "byte constant must be ASCII"); + let postfix = if unicode_width::UnicodeWidthChar::width(c).unwrap_or(1) == 0 { + format!(" but is {:?}", c) + } else { + String::new() + }; + err.span_label(span, &format!("byte constant must be ASCII{}", postfix)); if (c as u32) <= 0xFF { err.span_suggestion( span, &format!( - "if you meant to use the unicode code point for '{}', use a \\xHH escape", + "if you meant to use the unicode code point for {:?}, use a \\xHH escape", c ), format!("\\x{:X}", c as u32), @@ -173,7 +178,7 @@ pub(crate) fn emit_unescape_error( err.span_suggestion( span, &format!( - "if you meant to use the UTF-8 encoding of '{}', use \\xHH escapes", + "if you meant to use the UTF-8 encoding of {:?}, use \\xHH escapes", c ), utf8.as_bytes() @@ -187,10 +192,15 @@ pub(crate) fn emit_unescape_error( } EscapeError::NonAsciiCharInByteString => { assert!(mode.is_bytes()); - let (_c, span) = last_char(); + let (c, span) = last_char(); + let postfix = if unicode_width::UnicodeWidthChar::width(c).unwrap_or(1) == 0 { + format!(" but is {:?}", c) + } else { + String::new() + }; handler .struct_span_err(span, "raw byte string must be ASCII") - .span_label(span, "must be ASCII") + .span_label(span, &format!("must be ASCII{}", postfix)) .emit(); } EscapeError::OutOfRangeHexEscape => { diff --git a/src/test/ui/parser/unicode-control-codepoints.rs b/src/test/ui/parser/unicode-control-codepoints.rs new file mode 100644 index 0000000000000..5af0b585a1275 --- /dev/null +++ b/src/test/ui/parser/unicode-control-codepoints.rs @@ -0,0 +1,39 @@ +fn main() { + // if access_level != "us‫e‪r" { // Check if admin + //~^ ERROR unicode codepoint changing visible direction of text present in comment + println!("us\u{202B}e\u{202A}r"); + println!("{:?}", r#"us\u{202B}e\u{202A}r"#); + println!("{:?}", b"us\u{202B}e\u{202A}r"); + //~^ ERROR unicode escape in byte string + //~| ERROR unicode escape in byte string + println!("{:?}", br##"us\u{202B}e\u{202A}r"##); + + println!("{:?}", "/*‮ } ⁦if isAdmin⁩ ⁦ begin admins only "); + //~^ ERROR unicode codepoint changing visible direction of text present in literal + + println!("{:?}", r##"/*‮ } ⁦if isAdmin⁩ ⁦ begin admins only "##); + //~^ ERROR unicode codepoint changing visible direction of text present in literal + println!("{:?}", b"/*‮ } ⁦if isAdmin⁩ ⁦ begin admins only "); + //~^ ERROR non-ASCII character in byte constant + //~| ERROR non-ASCII character in byte constant + //~| ERROR non-ASCII character in byte constant + //~| ERROR non-ASCII character in byte constant + println!("{:?}", br##"/*‮ } ⁦if isAdmin⁩ ⁦ begin admins only "##); + //~^ ERROR raw byte string must be ASCII + //~| ERROR raw byte string must be ASCII + //~| ERROR raw byte string must be ASCII + //~| ERROR raw byte string must be ASCII + println!("{:?}", '‮'); + //~^ ERROR unicode codepoint changing visible direction of text present in literal +} + +//"/*‮ } ⁦if isAdmin⁩ ⁦ begin admins only */" +//~^ ERROR unicode codepoint changing visible direction of text present in comment + +/** '‮'); */fn foo() {} +//~^ ERROR unicode codepoint changing visible direction of text present in doc comment + +/** + * + * '‮'); */fn bar() {} +//~^^^ ERROR unicode codepoint changing visible direction of text present in doc comment diff --git a/src/test/ui/parser/unicode-control-codepoints.stderr b/src/test/ui/parser/unicode-control-codepoints.stderr new file mode 100644 index 0000000000000..71509fe41a84f --- /dev/null +++ b/src/test/ui/parser/unicode-control-codepoints.stderr @@ -0,0 +1,192 @@ +error: unicode escape in byte string + --> $DIR/unicode-control-codepoints.rs:6:26 + | +LL | println!("{:?}", b"us\u{202B}e\u{202A}r"); + | ^^^^^^^^ unicode escape in byte string + | + = help: unicode escape sequences cannot be used as a byte or in a byte string + +error: unicode escape in byte string + --> $DIR/unicode-control-codepoints.rs:6:35 + | +LL | println!("{:?}", b"us\u{202B}e\u{202A}r"); + | ^^^^^^^^ unicode escape in byte string + | + = help: unicode escape sequences cannot be used as a byte or in a byte string + +error: non-ASCII character in byte constant + --> $DIR/unicode-control-codepoints.rs:16:26 + | +LL | println!("{:?}", b"/* } if isAdmin begin admins only "); + | ^ byte constant must be ASCII but is '\u{202e}' + | +help: if you meant to use the UTF-8 encoding of '\u{202e}', use \xHH escapes + | +LL | println!("{:?}", b"/*\xE2\x80\xAE } if isAdmin begin admins only "); + | ~~~~~~~~~~~~ + +error: non-ASCII character in byte constant + --> $DIR/unicode-control-codepoints.rs:16:30 + | +LL | println!("{:?}", b"/* } if isAdmin begin admins only "); + | ^ byte constant must be ASCII but is '\u{2066}' + | +help: if you meant to use the UTF-8 encoding of '\u{2066}', use \xHH escapes + | +LL | println!("{:?}", b"/* } \xE2\x81\xA6if isAdmin begin admins only "); + | ~~~~~~~~~~~~ + +error: non-ASCII character in byte constant + --> $DIR/unicode-control-codepoints.rs:16:41 + | +LL | println!("{:?}", b"/* } if isAdmin begin admins only "); + | ^ byte constant must be ASCII but is '\u{2069}' + | +help: if you meant to use the UTF-8 encoding of '\u{2069}', use \xHH escapes + | +LL | println!("{:?}", b"/* } if isAdmin\xE2\x81\xA9 begin admins only "); + | ~~~~~~~~~~~~ + +error: non-ASCII character in byte constant + --> $DIR/unicode-control-codepoints.rs:16:43 + | +LL | println!("{:?}", b"/* } if isAdmin begin admins only "); + | ^ byte constant must be ASCII but is '\u{2066}' + | +help: if you meant to use the UTF-8 encoding of '\u{2066}', use \xHH escapes + | +LL | println!("{:?}", b"/* } if isAdmin \xE2\x81\xA6 begin admins only "); + | ~~~~~~~~~~~~ + +error: raw byte string must be ASCII + --> $DIR/unicode-control-codepoints.rs:21:29 + | +LL | println!("{:?}", br##"/* } if isAdmin begin admins only "##); + | ^ must be ASCII but is '\u{202e}' + +error: raw byte string must be ASCII + --> $DIR/unicode-control-codepoints.rs:21:33 + | +LL | println!("{:?}", br##"/* } if isAdmin begin admins only "##); + | ^ must be ASCII but is '\u{2066}' + +error: raw byte string must be ASCII + --> $DIR/unicode-control-codepoints.rs:21:44 + | +LL | println!("{:?}", br##"/* } if isAdmin begin admins only "##); + | ^ must be ASCII but is '\u{2069}' + +error: raw byte string must be ASCII + --> $DIR/unicode-control-codepoints.rs:21:46 + | +LL | println!("{:?}", br##"/* } if isAdmin begin admins only "##); + | ^ must be ASCII but is '\u{2066}' + +error: unicode codepoint changing visible direction of text present in comment + --> $DIR/unicode-control-codepoints.rs:2:5 + | +LL | // if access_level != "user" { // Check if admin + | ^^^^^^^^^^^^^^^^^^^^^^^^^--^^^^^^^^^^^^^^^^^^^^^ + | | || + | | |'\u{202a}' + | | '\u{202b}' + | this comment contains invisible unicode text flow control codepoints + | + = note: `#[deny(text_direction_codepoint_in_comment)]` on by default + = note: these kind of unicode codepoints change the way text flows on applications that support them, but can cause confusion because they change the order of characters on the screen + = help: if their presence wasn't intentional, you can remove them + +error: unicode codepoint changing visible direction of text present in comment + --> $DIR/unicode-control-codepoints.rs:30:1 + | +LL | //"/* } if isAdmin begin admins only */" + | ^^^^^-^^-^^^^^^^^^--^^^^^^^^^^^^^^^^^^^^^ + | | | | || + | | | | |'\u{2066}' + | | | | '\u{2069}' + | | | '\u{2066}' + | | '\u{202e}' + | this comment contains invisible unicode text flow control codepoints + | + = note: these kind of unicode codepoints change the way text flows on applications that support them, but can cause confusion because they change the order of characters on the screen + = help: if their presence wasn't intentional, you can remove them + +error: unicode codepoint changing visible direction of text present in literal + --> $DIR/unicode-control-codepoints.rs:11:22 + | +LL | println!("{:?}", "/* } if isAdmin begin admins only "); + | ^^^-^^-^^^^^^^^^--^^^^^^^^^^^^^^^^^^^ + | | | | || + | | | | |'\u{2066}' + | | | | '\u{2069}' + | | | '\u{2066}' + | | '\u{202e}' + | this literal contains invisible unicode text flow control codepoints + | + = note: `#[deny(text_direction_codepoint_in_literal)]` on by default + = note: these kind of unicode codepoints change the way text flows on applications that support them, but can cause confusion because they change the order of characters on the screen + = help: if their presence wasn't intentional, you can remove them +help: if you want to keep them but make them visible in your source code, you can escape them + | +LL | println!("{:?}", "/*\u{202e} } \u{2066}if isAdmin\u{2069} \u{2066} begin admins only "); + | ~~~~~~~~ ~~~~~~~~ ~~~~~~~~ ~~~~~~~~ + +error: unicode codepoint changing visible direction of text present in literal + --> $DIR/unicode-control-codepoints.rs:14:22 + | +LL | println!("{:?}", r##"/* } if isAdmin begin admins only "##); + | ^^^^^^-^^-^^^^^^^^^--^^^^^^^^^^^^^^^^^^^^^ + | | | | || + | | | | |'\u{2066}' + | | | | '\u{2069}' + | | | '\u{2066}' + | | '\u{202e}' + | this literal contains invisible unicode text flow control codepoints + | + = note: these kind of unicode codepoints change the way text flows on applications that support them, but can cause confusion because they change the order of characters on the screen + = help: if their presence wasn't intentional, you can remove them +help: if you want to keep them but make them visible in your source code, you can escape them + | +LL | println!("{:?}", r##"/*\u{202e} } \u{2066}if isAdmin\u{2069} \u{2066} begin admins only "##); + | ~~~~~~~~ ~~~~~~~~ ~~~~~~~~ ~~~~~~~~ + +error: unicode codepoint changing visible direction of text present in literal + --> $DIR/unicode-control-codepoints.rs:26:22 + | +LL | println!("{:?}", ''); + | ^- + | || + | |'\u{202e}' + | this literal contains an invisible unicode text flow control codepoint + | + = note: these kind of unicode codepoints change the way text flows on applications that support them, but can cause confusion because they change the order of characters on the screen + = help: if their presence wasn't intentional, you can remove them +help: if you want to keep them but make them visible in your source code, you can escape them + | +LL | println!("{:?}", '\u{202e}'); + | ~~~~~~~~ + +error: unicode codepoint changing visible direction of text present in doc comment + --> $DIR/unicode-control-codepoints.rs:33:1 + | +LL | /** ''); */fn foo() {} + | ^^^^^^^^^^^^ this doc comment contains an invisible unicode text flow control codepoint + | + = note: these kind of unicode codepoints change the way text flows on applications that support them, but can cause confusion because they change the order of characters on the screen + = note: if their presence wasn't intentional, you can remove them + = note: if you want to keep them but make them visible in your source code, you can escape them: '\u{202e}' + +error: unicode codepoint changing visible direction of text present in doc comment + --> $DIR/unicode-control-codepoints.rs:36:1 + | +LL | / /** +LL | | * +LL | | * ''); */fn bar() {} + | |___________^ this doc comment contains an invisible unicode text flow control codepoint + | + = note: these kind of unicode codepoints change the way text flows on applications that support them, but can cause confusion because they change the order of characters on the screen + = note: if their presence wasn't intentional, you can remove them + = note: if you want to keep them but make them visible in your source code, you can escape them: '\u{202e}' + +error: aborting due to 17 previous errors + From c6d24de9f8713dae22373189ac862f375251a5e1 Mon Sep 17 00:00:00 2001 From: Pietro Albini Date: Sun, 31 Oct 2021 13:11:36 +0100 Subject: [PATCH 2/4] add 1.56.1 to the release notes --- RELEASES.md | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/RELEASES.md b/RELEASES.md index 52d823d8acac4..59d4d4507ffce 100644 --- a/RELEASES.md +++ b/RELEASES.md @@ -1,3 +1,11 @@ +Version 1.56.1 (2021-11-01) +=========================== + +- New lints to detect the presence of bidirectional-override Unicode + codepoints in the compiled source code ([CVE-2021-42574]) + +[CVE-2021-42574]: https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-42574 + Version 1.56.0 (2021-10-21) ======================== From 2e7743efe077a2c6503414bf73c6e90436d2eb30 Mon Sep 17 00:00:00 2001 From: Pietro Albini Date: Sun, 31 Oct 2021 13:12:08 +0100 Subject: [PATCH 3/4] change version number to 1.56.1 --- src/version | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/version b/src/version index 3ebf789f5a8df..43c989b55315f 100644 --- a/src/version +++ b/src/version @@ -1 +1 @@ -1.56.0 +1.56.1 From 6552f7a75a4ee0f314ca5e87edc4c322d3f3eceb Mon Sep 17 00:00:00 2001 From: Pietro Albini Date: Mon, 1 Nov 2021 08:12:01 +0100 Subject: [PATCH 4/4] ignore prim-methods-external-core rustdoc test on macOS The test is for an unstable feature that doesn't affect the stable release (no_core), and it's causing CI issues for macOS. --- src/test/rustdoc/intra-doc/prim-methods-external-core.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/test/rustdoc/intra-doc/prim-methods-external-core.rs b/src/test/rustdoc/intra-doc/prim-methods-external-core.rs index 9347d7bb42819..348345830330b 100644 --- a/src/test/rustdoc/intra-doc/prim-methods-external-core.rs +++ b/src/test/rustdoc/intra-doc/prim-methods-external-core.rs @@ -2,6 +2,7 @@ // build-aux-docs // ignore-cross-compile // ignore-windows +// ignore-macos #![deny(broken_intra_doc_links)] #![feature(no_core, lang_items)]