diff options
Diffstat (limited to 'poky/meta/recipes-devtools/gcc/gcc/0002-CVE-2021-42574.patch')
-rw-r--r-- | poky/meta/recipes-devtools/gcc/gcc/0002-CVE-2021-42574.patch | 1765 |
1 files changed, 0 insertions, 1765 deletions
diff --git a/poky/meta/recipes-devtools/gcc/gcc/0002-CVE-2021-42574.patch b/poky/meta/recipes-devtools/gcc/gcc/0002-CVE-2021-42574.patch deleted file mode 100644 index 9bad81d4d0..0000000000 --- a/poky/meta/recipes-devtools/gcc/gcc/0002-CVE-2021-42574.patch +++ /dev/null @@ -1,1765 +0,0 @@ -From 51c500269bf53749b107807d84271385fad35628 Mon Sep 17 00:00:00 2001 -From: Marek Polacek <polacek@redhat.com> -Date: Wed, 6 Oct 2021 14:33:59 -0400 -Subject: [PATCH] libcpp: Implement -Wbidi-chars for CVE-2021-42574 [PR103026] - -From a link below: -"An issue was discovered in the Bidirectional Algorithm in the Unicode -Specification through 14.0. It permits the visual reordering of -characters via control sequences, which can be used to craft source code -that renders different logic than the logical ordering of tokens -ingested by compilers and interpreters. Adversaries can leverage this to -encode source code for compilers accepting Unicode such that targeted -vulnerabilities are introduced invisibly to human reviewers." - -More info: -https://nvd.nist.gov/vuln/detail/CVE-2021-42574 -https://trojansource.codes/ - -This is not a compiler bug. However, to mitigate the problem, this patch -implements -Wbidi-chars=[none|unpaired|any] to warn about possibly -misleading Unicode bidirectional control characters the preprocessor may -encounter. - -The default is =unpaired, which warns about improperly terminated -bidirectional control characters; e.g. a LRE without its corresponding PDF. -The level =any warns about any use of bidirectional control characters. - -This patch handles both UCNs and UTF-8 characters. UCNs designating -bidi characters in identifiers are accepted since r204886. Then r217144 -enabled -fextended-identifiers by default. Extended characters in C/C++ -identifiers have been accepted since r275979. However, this patch still -warns about mixing UTF-8 and UCN bidi characters; there seems to be no -good reason to allow mixing them. - -We warn in different contexts: comments (both C and C++-style), string -literals, character constants, and identifiers. Expectedly, UCNs are ignored -in comments and raw string literals. The bidirectional control characters -can nest so this patch handles that as well. - -I have not included nor tested this at all with Fortran (which also has -string literals and line comments). - -Dave M. posted patches improving diagnostic involving Unicode characters. -This patch does not make use of this new infrastructure yet. - - PR preprocessor/103026 - -gcc/c-family/ChangeLog: - - * c.opt (Wbidi-chars, Wbidi-chars=): New option. - -gcc/ChangeLog: - - * doc/invoke.texi: Document -Wbidi-chars. - -libcpp/ChangeLog: - - * include/cpplib.h (enum cpp_bidirectional_level): New. - (struct cpp_options): Add cpp_warn_bidirectional. - (enum cpp_warning_reason): Add CPP_W_BIDIRECTIONAL. - * internal.h (struct cpp_reader): Add warn_bidi_p member - function. - * init.c (cpp_create_reader): Set cpp_warn_bidirectional. - * lex.c (bidi): New namespace. - (get_bidi_utf8): New function. - (get_bidi_ucn): Likewise. - (maybe_warn_bidi_on_close): Likewise. - (maybe_warn_bidi_on_char): Likewise. - (_cpp_skip_block_comment): Implement warning about bidirectional - control characters. - (skip_line_comment): Likewise. - (forms_identifier_p): Likewise. - (lex_identifier): Likewise. - (lex_string): Likewise. - (lex_raw_string): Likewise. - -gcc/testsuite/ChangeLog: - - * c-c++-common/Wbidi-chars-1.c: New test. - * c-c++-common/Wbidi-chars-2.c: New test. - * c-c++-common/Wbidi-chars-3.c: New test. - * c-c++-common/Wbidi-chars-4.c: New test. - * c-c++-common/Wbidi-chars-5.c: New test. - * c-c++-common/Wbidi-chars-6.c: New test. - * c-c++-common/Wbidi-chars-7.c: New test. - * c-c++-common/Wbidi-chars-8.c: New test. - * c-c++-common/Wbidi-chars-9.c: New test. - * c-c++-common/Wbidi-chars-10.c: New test. - * c-c++-common/Wbidi-chars-11.c: New test. - * c-c++-common/Wbidi-chars-12.c: New test. - * c-c++-common/Wbidi-chars-13.c: New test. - * c-c++-common/Wbidi-chars-14.c: New test. - * c-c++-common/Wbidi-chars-15.c: New test. - * c-c++-common/Wbidi-chars-16.c: New test. - * c-c++-common/Wbidi-chars-17.c: New test. - -CVE: CVE-2021-42574 -Upstream-Status: Backport [https://gcc.gnu.org/git/gitweb.cgi?p=gcc.git;h=51c500269bf53749b107807d84271385fad35628] -Signed-off-by: Pgowda <pgowda.cve@gmail.com> - ---- - gcc/c-family/c.opt | 24 ++ - gcc/doc/invoke.texi | 21 +- - gcc/testsuite/c-c++-common/Wbidi-chars-1.c | 12 + - gcc/testsuite/c-c++-common/Wbidi-chars-10.c | 27 ++ - gcc/testsuite/c-c++-common/Wbidi-chars-11.c | 13 + - gcc/testsuite/c-c++-common/Wbidi-chars-12.c | 19 + - gcc/testsuite/c-c++-common/Wbidi-chars-13.c | 17 + - gcc/testsuite/c-c++-common/Wbidi-chars-14.c | 38 ++ - gcc/testsuite/c-c++-common/Wbidi-chars-15.c | 59 +++ - gcc/testsuite/c-c++-common/Wbidi-chars-16.c | 26 ++ - gcc/testsuite/c-c++-common/Wbidi-chars-17.c | 30 ++ - gcc/testsuite/c-c++-common/Wbidi-chars-2.c | 9 + - gcc/testsuite/c-c++-common/Wbidi-chars-3.c | 11 + - gcc/testsuite/c-c++-common/Wbidi-chars-4.c | 188 +++++++++ - gcc/testsuite/c-c++-common/Wbidi-chars-5.c | 188 +++++++++ - gcc/testsuite/c-c++-common/Wbidi-chars-6.c | 155 ++++++++ - gcc/testsuite/c-c++-common/Wbidi-chars-7.c | 9 + - gcc/testsuite/c-c++-common/Wbidi-chars-8.c | 13 + - gcc/testsuite/c-c++-common/Wbidi-chars-9.c | 29 ++ - libcpp/include/cpplib.h | 18 +- - libcpp/init.c | 1 + - libcpp/internal.h | 7 + - libcpp/lex.c | 408 +++++++++++++++++++- - 23 files changed, 1315 insertions(+), 7 deletions(-) - create mode 100644 gcc/testsuite/c-c++-common/Wbidi-chars-1.c - create mode 100644 gcc/testsuite/c-c++-common/Wbidi-chars-10.c - create mode 100644 gcc/testsuite/c-c++-common/Wbidi-chars-11.c - create mode 100644 gcc/testsuite/c-c++-common/Wbidi-chars-12.c - create mode 100644 gcc/testsuite/c-c++-common/Wbidi-chars-13.c - create mode 100644 gcc/testsuite/c-c++-common/Wbidi-chars-14.c - create mode 100644 gcc/testsuite/c-c++-common/Wbidi-chars-15.c - create mode 100644 gcc/testsuite/c-c++-common/Wbidi-chars-16.c - create mode 100644 gcc/testsuite/c-c++-common/Wbidi-chars-17.c - create mode 100644 gcc/testsuite/c-c++-common/Wbidi-chars-2.c - create mode 100644 gcc/testsuite/c-c++-common/Wbidi-chars-3.c - create mode 100644 gcc/testsuite/c-c++-common/Wbidi-chars-4.c - create mode 100644 gcc/testsuite/c-c++-common/Wbidi-chars-5.c - create mode 100644 gcc/testsuite/c-c++-common/Wbidi-chars-6.c - create mode 100644 gcc/testsuite/c-c++-common/Wbidi-chars-7.c - create mode 100644 gcc/testsuite/c-c++-common/Wbidi-chars-8.c - create mode 100644 gcc/testsuite/c-c++-common/Wbidi-chars-9.c - -diff --git a/gcc/c-family/c.opt b/gcc/c-family/c.opt -index 8a4cd634f77..3976fc368db 100644 ---- a/gcc/c-family/c.opt -+++ b/gcc/c-family/c.opt -@@ -370,6 +370,30 @@ Wbad-function-cast - C ObjC Var(warn_bad_function_cast) Warning - Warn about casting functions to incompatible types. - -+Wbidi-chars -+C ObjC C++ ObjC++ Warning Alias(Wbidi-chars=,any,none) -+; -+ -+Wbidi-chars= -+C ObjC C++ ObjC++ RejectNegative Joined Warning CPP(cpp_warn_bidirectional) CppReason(CPP_W_BIDIRECTIONAL) Var(warn_bidirectional) Init(bidirectional_unpaired) Enum(cpp_bidirectional_level) -+-Wbidi-chars=[none|unpaired|any] Warn about UTF-8 bidirectional control characters. -+ -+; Required for these enum values. -+SourceInclude -+cpplib.h -+ -+Enum -+Name(cpp_bidirectional_level) Type(int) UnknownError(argument %qs to %<-Wbidi-chars%> not recognized) -+ -+EnumValue -+Enum(cpp_bidirectional_level) String(none) Value(bidirectional_none) -+ -+EnumValue -+Enum(cpp_bidirectional_level) String(unpaired) Value(bidirectional_unpaired) -+ -+EnumValue -+Enum(cpp_bidirectional_level) String(any) Value(bidirectional_any) -+ - Wbool-compare - C ObjC C++ ObjC++ Var(warn_bool_compare) Warning LangEnabledBy(C ObjC C++ ObjC++,Wall) - Warn about boolean expression compared with an integer value different from true/false. -diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi -index 6070288856c..a22758d18ee 100644 ---- a/gcc/doc/invoke.texi -+++ b/gcc/doc/invoke.texi -@@ -326,7 +326,9 @@ Objective-C and Objective-C++ Dialects}. - -Warith-conversion @gol - -Warray-bounds -Warray-bounds=@var{n} @gol - -Wno-attributes -Wattribute-alias=@var{n} -Wno-attribute-alias @gol ---Wno-attribute-warning -Wbool-compare -Wbool-operation @gol -+-Wno-attribute-warning @gol -+-Wbidi-chars=@r{[}none@r{|}unpaired@r{|}any@r{]} @gol -+-Wbool-compare -Wbool-operation @gol - -Wno-builtin-declaration-mismatch @gol - -Wno-builtin-macro-redefined -Wc90-c99-compat -Wc99-c11-compat @gol - -Wc11-c2x-compat @gol -@@ -7559,6 +7561,23 @@ Attributes considered include @code{allo - This is the default. You can disable these warnings with either - @option{-Wno-attribute-alias} or @option{-Wattribute-alias=0}. - -+@item -Wbidi-chars=@r{[}none@r{|}unpaired@r{|}any@r{]} -+@opindex Wbidi-chars= -+@opindex Wbidi-chars -+@opindex Wno-bidi-chars -+Warn about possibly misleading UTF-8 bidirectional control characters in -+comments, string literals, character constants, and identifiers. Such -+characters can change left-to-right writing direction into right-to-left -+(and vice versa), which can cause confusion between the logical order and -+visual order. This may be dangerous; for instance, it may seem that a piece -+of code is not commented out, whereas it in fact is. -+ -+There are three levels of warning supported by GCC@. The default is -+@option{-Wbidi-chars=unpaired}, which warns about improperly terminated -+bidi contexts. @option{-Wbidi-chars=none} turns the warning off. -+@option{-Wbidi-chars=any} warns about any use of bidirectional control -+characters. -+ - @item -Wbool-compare - @opindex Wno-bool-compare - @opindex Wbool-compare -diff --git a/gcc/testsuite/c-c++-common/Wbidi-chars-10.c b/gcc/testsuite/c-c++-common/Wbidi-chars-10.c -new file mode 100644 -index 00000000000..34f5ac19271 ---- /dev/null -+++ b/gcc/testsuite/c-c++-common/Wbidi-chars-10.c -@@ -0,0 +1,27 @@ -+/* PR preprocessor/103026 */ -+/* { dg-do compile } */ -+/* { dg-options "-Wbidi-chars=unpaired" } */ -+/* More nesting testing. */ -+ -+/* RLEâ« LRI⦠PDF⬠PDIâ©*/ -+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ -+int LRE_\u202a_PDF_\u202c; -+int LRE_\u202a_PDF_\u202c_LRE_\u202a_PDF_\u202c; -+int LRE_\u202a_LRI_\u2066_PDF_\u202c_PDI_\u2069; -+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ -+int RLE_\u202b_RLI_\u2067_PDF_\u202c_PDI_\u2069; -+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ -+int RLE_\u202b_RLI_\u2067_PDI_\u2069_PDF_\u202c; -+int FSI_\u2068_LRO_\u202d_PDI_\u2069_PDF_\u202c; -+int FSI_\u2068; -+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ -+int FSI_\u2068_PDI_\u2069; -+int FSI_\u2068_FSI_\u2068_PDI_\u2069; -+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ -+int RLI_\u2067_RLI_\u2067_RLI_\u2067_RLI_\u2067_RLI_\u2067_RLI_\u2067_RLI_\u2067_PDI_\u2069_PDI_\u2069_PDI_\u2069_PDI_\u2069_PDI_\u2069_PDI_\u2069_PDI_\u2069; -+int RLI_\u2067_RLI_\u2067_RLI_\u2067_RLI_\u2067_RLI_\u2067_RLI_\u2067_RLI_\u2067_PDI_\u2069_PDI_\u2069_PDI_\u2069_PDI_\u2069_PDI_\u2069_PDI_\u2069; -+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ -+int RLI_\u2067_RLI_\u2067_RLI_\u2067_RLI_\u2067_RLI_\u2067_RLI_\u2067_RLI_\u2067_PDI_\u2069_PDI_\u2069_PDI_\u2069_PDI_\u2069_PDI_\u2069_PDI_\u2069_PDF_\u202c; -+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ -+int RLI_\u2067_RLI_\u2067_RLI_\u2067_RLI_\u2067_FSI_\u2068_PDI_\u2069_PDI_\u2069_PDI_\u2069_PDI_\u2069; -+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ -diff --git a/gcc/testsuite/c-c++-common/Wbidi-chars-11.c b/gcc/testsuite/c-c++-common/Wbidi-chars-11.c -new file mode 100644 -index 00000000000..270ce2368a9 ---- /dev/null -+++ b/gcc/testsuite/c-c++-common/Wbidi-chars-11.c -@@ -0,0 +1,13 @@ -+/* PR preprocessor/103026 */ -+/* { dg-do compile } */ -+/* { dg-options "-Wbidi-chars=unpaired" } */ -+/* Test that we warn when mixing UCN and UTF-8. */ -+ -+int LRE_âª_PDF_\u202c; -+/* { dg-warning "mismatch" "" { target *-*-* } .-1 } */ -+int LRE_\u202a_PDF_â¬_; -+/* { dg-warning "mismatch" "" { target *-*-* } .-1 } */ -+const char *s1 = "LRE_âª_PDF_\u202c"; -+/* { dg-warning "mismatch" "" { target *-*-* } .-1 } */ -+const char *s2 = "LRE_\u202a_PDF_â¬"; -+/* { dg-warning "mismatch" "" { target *-*-* } .-1 } */ -diff --git a/gcc/testsuite/c-c++-common/Wbidi-chars-12.c b/gcc/testsuite/c-c++-common/Wbidi-chars-12.c -new file mode 100644 -index 00000000000..b07eec1da91 ---- /dev/null -+++ b/gcc/testsuite/c-c++-common/Wbidi-chars-12.c -@@ -0,0 +1,19 @@ -+/* PR preprocessor/103026 */ -+/* { dg-do compile { target { c || c++11 } } } */ -+/* { dg-options "-Wbidi-chars=any" } */ -+/* Test raw strings. */ -+ -+const char *s1 = R"(a b c LRE⪠1 2 3 PDF⬠x y z)"; -+/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */ -+const char *s2 = R"(a b c RLEâ« 1 2 3 PDF⬠x y z)"; -+/* { dg-warning "U\\+202B" "" { target *-*-* } .-1 } */ -+const char *s3 = R"(a b c LROâ 1 2 3 PDF⬠x y z)"; -+/* { dg-warning "U\\+202D" "" { target *-*-* } .-1 } */ -+const char *s4 = R"(a b c RLOâ® 1 2 3 PDF⬠x y z)"; -+/* { dg-warning "U\\+202E" "" { target *-*-* } .-1 } */ -+const char *s7 = R"(a b c FSI⨠1 2 3 PDIâ© x y) z"; -+/* { dg-warning "U\\+2068" "" { target *-*-* } .-1 } */ -+const char *s8 = R"(a b c PDIâ© x y )z"; -+/* { dg-warning "U\\+2069" "" { target *-*-* } .-1 } */ -+const char *s9 = R"(a b c PDF⬠x y z)"; -+/* { dg-warning "U\\+202C" "" { target *-*-* } .-1 } */ -diff -uprN '-x*.orig' '-x*.rej' del/gcc-11.2.0/gcc/testsuite/c-c++-common/Wbidi-chars-13.c gcc-11.2.0/gcc/testsuite/c-c++-common/Wbidi-chars-13.c ---- del/gcc-11.2.0/gcc/testsuite/c-c++-common/Wbidi-chars-13.c 1969-12-31 16:00:00.000000000 -0800 -+++ gcc-11.2.0/gcc/testsuite/c-c++-common/Wbidi-chars-13.c 2021-12-13 23:11:22.328439287 -0800 -@@ -0,0 +1,17 @@ -+/* PR preprocessor/103026 */ -+/* { dg-do compile { target { c || c++11 } } } */ -+/* { dg-options "-Wbidi-chars=unpaired" } */ -+/* Test raw strings. */ -+ -+const char *s1 = R"(a b c LRE⪠1 2 3)"; -+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ -+const char *s2 = R"(a b c RLEâ« 1 2 3)"; -+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ -+const char *s3 = R"(a b c LROâ 1 2 3)"; -+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ -+const char *s4 = R"(a b c FSI⨠1 2 3)"; -+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ -+const char *s5 = R"(a b c LRI⦠1 2 3)"; -+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ -+const char *s6 = R"(a b c RLI⧠1 2 3)"; -+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ -diff --git a/gcc/testsuite/c-c++-common/Wbidi-chars-14.c b/gcc/testsuite/c-c++-common/Wbidi-chars-14.c -new file mode 100644 -index 00000000000..ba5f75d9553 ---- /dev/null -+++ b/gcc/testsuite/c-c++-common/Wbidi-chars-14.c -@@ -0,0 +1,38 @@ -+/* PR preprocessor/103026 */ -+/* { dg-do compile } */ -+/* { dg-options "-Wbidi-chars=unpaired" } */ -+/* Test PDI handling, which also pops any subsequent LREs, RLEs, LROs, -+ or RLOs. */ -+ -+/* LRI_â¦_LRI_â¦_RLE_â«_RLE_â«_RLE_â«_PDI_â©*/ -+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ -+// LRI_â¦_RLE_â«_RLE_â«_RLE_â«_PDI_â© -+// LRI_â¦_RLO_â®_RLE_â«_RLE_â«_PDI_â© -+// LRI_â¦_RLO_â®_RLE_â«_PDI_â© -+// FSI_â¨_RLO_â®_PDI_â© -+// FSI_â¨_FSI_â¨_RLO_â®_PDI_â© -+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ -+ -+int LRI_\u2066_LRI_\u2066_LRE_\u202a_LRE_\u202a_LRE_\u202a_PDI_\u2069; -+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ -+int LRI_\u2066_LRI_\u2066_LRE_\u202a_LRE_\u202a_LRE_\u202a_PDI_\u2069_PDI_\u2069; -+int LRI_\u2066_LRI_\u2066_LRI_\u2066_LRE_\u202a_LRE_\u202a_LRE_\u202a_PDI_\u2069_PDI_\u2069; -+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ -+int PDI_\u2069; -+int LRI_\u2066_PDI_\u2069; -+int RLI_\u2067_PDI_\u2069; -+int LRE_\u202a_LRI_\u2066_PDI_\u2069; -+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ -+int LRI_\u2066_LRE_\u202a_PDF_\u202c_PDI_\u2069; -+int LRI_\u2066_LRE_\u202a_LRE_\u202a_PDF_\u202c_PDI_\u2069; -+int RLI_\u2067_LRI_\u2066_LRE_\u202a_LRE_\u202a_PDF_\u202c_PDI_\u2069; -+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ -+int FSI_\u2068_LRI_\u2066_LRE_\u202a_LRE_\u202a_PDF_\u202c_PDI_\u2069; -+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ -+int RLO_\u202e_PDI_\u2069; -+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ -+int RLI_\u2067_PDI_\u2069_RLI_\u2067; -+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ -+int FSI_\u2068_PDF_\u202c_PDI_\u2069; -+int FSI_\u2068_FSI_\u2068_PDF_\u202c_PDI_\u2069; -+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ -diff --git a/gcc/testsuite/c-c++-common/Wbidi-chars-15.c b/gcc/testsuite/c-c++-common/Wbidi-chars-15.c -new file mode 100644 -index 00000000000..a0ce8ff5e2c ---- /dev/null -+++ b/gcc/testsuite/c-c++-common/Wbidi-chars-15.c -@@ -0,0 +1,59 @@ -+/* PR preprocessor/103026 */ -+/* { dg-do compile } */ -+/* { dg-options "-Wbidi-chars=unpaired" } */ -+/* Test unpaired bidi control chars in multiline comments. */ -+ -+/* -+ * LRE⪠end -+ */ -+/* { dg-warning "unpaired" "" { target *-*-* } .-2 } */ -+/* -+ * RLEâ« end -+ */ -+/* { dg-warning "unpaired" "" { target *-*-* } .-2 } */ -+/* -+ * LROâ end -+ */ -+/* { dg-warning "unpaired" "" { target *-*-* } .-2 } */ -+/* -+ * RLOâ® end -+ */ -+/* { dg-warning "unpaired" "" { target *-*-* } .-2 } */ -+/* -+ * LRI⦠end -+ */ -+/* { dg-warning "unpaired" "" { target *-*-* } .-2 } */ -+/* -+ * RLI⧠end -+ */ -+/* { dg-warning "unpaired" "" { target *-*-* } .-2 } */ -+/* -+ * FSI⨠end -+ */ -+/* { dg-warning "unpaired" "" { target *-*-* } .-2 } */ -+/* LRE⪠-+ PDF⬠*/ -+/* { dg-warning "unpaired" "" { target *-*-* } .-2 } */ -+/* FSI⨠-+ PDIâ© */ -+/* { dg-warning "unpaired" "" { target *-*-* } .-2 } */ -+ -+/* LRE<âª> -+ * -+ */ -+/* { dg-warning "unpaired" "" { target *-*-* } .-3 } */ -+ -+/* -+ * LRE<âª> -+ */ -+/* { dg-warning "unpaired" "" { target *-*-* } .-2 } */ -+ -+/* -+ * -+ * LRE<âª> */ -+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ -+ -+/* RLI<â§> */ /* PDI<â©> */ -+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ -+/* LRE<âª> */ /* PDF<â¬> */ -+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ -diff --git a/gcc/testsuite/c-c++-common/Wbidi-chars-16.c b/gcc/testsuite/c-c++-common/Wbidi-chars-16.c -new file mode 100644 -index 00000000000..baa0159861c ---- /dev/null -+++ b/gcc/testsuite/c-c++-common/Wbidi-chars-16.c -@@ -0,0 +1,26 @@ -+/* PR preprocessor/103026 */ -+/* { dg-do compile } */ -+/* { dg-options "-Wbidi-chars=any" } */ -+/* Test LTR/RTL chars. */ -+ -+/* LTR<â> */ -+/* { dg-warning "U\\+200E" "" { target *-*-* } .-1 } */ -+// LTR<â> -+/* { dg-warning "U\\+200E" "" { target *-*-* } .-1 } */ -+/* RTL<â> */ -+/* { dg-warning "U\\+200F" "" { target *-*-* } .-1 } */ -+// RTL<â> -+/* { dg-warning "U\\+200F" "" { target *-*-* } .-1 } */ -+ -+const char *s1 = "LTR<â>"; -+/* { dg-warning "U\\+200E" "" { target *-*-* } .-1 } */ -+const char *s2 = "LTR\u200e"; -+/* { dg-warning "U\\+200E" "" { target *-*-* } .-1 } */ -+const char *s3 = "LTR\u200E"; -+/* { dg-warning "U\\+200E" "" { target *-*-* } .-1 } */ -+const char *s4 = "RTL<â>"; -+/* { dg-warning "U\\+200F" "" { target *-*-* } .-1 } */ -+const char *s5 = "RTL\u200f"; -+/* { dg-warning "U\\+200F" "" { target *-*-* } .-1 } */ -+const char *s6 = "RTL\u200F"; -+/* { dg-warning "U\\+200F" "" { target *-*-* } .-1 } */ -diff --git a/gcc/testsuite/c-c++-common/Wbidi-chars-17.c b/gcc/testsuite/c-c++-common/Wbidi-chars-17.c -new file mode 100644 -index 00000000000..07cb4321f96 ---- /dev/null -+++ b/gcc/testsuite/c-c++-common/Wbidi-chars-17.c -@@ -0,0 +1,30 @@ -+/* PR preprocessor/103026 */ -+/* { dg-do compile } */ -+/* { dg-options "-Wbidi-chars=unpaired" } */ -+/* Test LTR/RTL chars. */ -+ -+/* LTR<â> */ -+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ -+// LTR<â> -+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ -+/* RTL<â> */ -+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ -+// RTL<â> -+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ -+int ltr_\u200e; -+/* { dg-error "universal character " "" { target *-*-* } .-1 } */ -+int rtl_\u200f; -+/* { dg-error "universal character " "" { target *-*-* } .-1 } */ -+ -+const char *s1 = "LTR<â>"; -+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ -+const char *s2 = "LTR\u200e"; -+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ -+const char *s3 = "LTR\u200E"; -+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ -+const char *s4 = "RTL<â>"; -+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ -+const char *s5 = "RTL\u200f"; -+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ -+const char *s6 = "RTL\u200F"; -+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ -diff --git a/gcc/testsuite/c-c++-common/Wbidi-chars-1.c b/gcc/testsuite/c-c++-common/Wbidi-chars-1.c -new file mode 100644 -index 00000000000..2340374f276 ---- /dev/null -+++ b/gcc/testsuite/c-c++-common/Wbidi-chars-1.c -@@ -0,0 +1,12 @@ -+/* PR preprocessor/103026 */ -+/* { dg-do compile } */ -+ -+int main() { -+ int isAdmin = 0; -+ /*â® } â¦if (isAdmin)⩠⦠begin admins only */ -+/* { dg-warning "bidirectional" "" { target *-*-* } .-1 } */ -+ __builtin_printf("You are an admin.\n"); -+ /* end admins only â® { â¦*/ -+/* { dg-warning "bidirectional" "" { target *-*-* } .-1 } */ -+ return 0; -+} -diff --git a/gcc/testsuite/c-c++-common/Wbidi-chars-2.c b/gcc/testsuite/c-c++-common/Wbidi-chars-2.c -new file mode 100644 -index 00000000000..2340374f276 ---- /dev/null -+++ b/gcc/testsuite/c-c++-common/Wbidi-chars-2.c -@@ -0,0 +1,9 @@ -+/* PR preprocessor/103026 */ -+/* { dg-do compile } */ -+ -+int main() { -+ /* Say hello; newlineâ§/*/ return 0 ; -+/* { dg-warning "bidirectional" "" { target *-*-* } .-1 } */ -+ __builtin_printf("Hello world.\n"); -+ return 0; -+} -diff --git a/gcc/testsuite/c-c++-common/Wbidi-chars-3.c b/gcc/testsuite/c-c++-common/Wbidi-chars-3.c -new file mode 100644 -index 00000000000..9dc7edb6e64 ---- /dev/null -+++ b/gcc/testsuite/c-c++-common/Wbidi-chars-3.c -@@ -0,0 +1,11 @@ -+/* PR preprocessor/103026 */ -+/* { dg-do compile } */ -+ -+int main() { -+ const char* access_level = "user"; -+ if (__builtin_strcmp(access_level, "userâ® â¦// Check if adminâ© â¦")) { -+/* { dg-warning "bidirectional" "" { target *-*-* } .-1 } */ -+ __builtin_printf("You are an admin.\n"); -+ } -+ return 0; -+} -diff --git a/gcc/testsuite/c-c++-common/Wbidi-chars-4.c b/gcc/testsuite/c-c++-common/Wbidi-chars-4.c -new file mode 100644 -index 00000000000..639e5c62e88 ---- /dev/null -+++ b/gcc/testsuite/c-c++-common/Wbidi-chars-4.c -@@ -0,0 +1,188 @@ -+/* PR preprocessor/103026 */ -+/* { dg-do compile } */ -+/* { dg-options "-Wbidi-chars=any -Wno-multichar -Wno-overflow" } */ -+/* Test all bidi chars in various contexts (identifiers, comments, -+ string literals, character constants), both UCN and UTF-8. The bidi -+ chars here are properly terminated, except for the character constants. */ -+ -+/* a b c LRE⪠1 2 3 PDF⬠x y z */ -+/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */ -+/* a b c RLEâ« 1 2 3 PDF⬠x y z */ -+/* { dg-warning "U\\+202B" "" { target *-*-* } .-1 } */ -+/* a b c LROâ 1 2 3 PDF⬠x y z */ -+/* { dg-warning "U\\+202D" "" { target *-*-* } .-1 } */ -+/* a b c RLOâ® 1 2 3 PDF⬠x y z */ -+/* { dg-warning "U\\+202E" "" { target *-*-* } .-1 } */ -+/* a b c LRI⦠1 2 3 PDIâ© x y z */ -+/* { dg-warning "U\\+2066" "" { target *-*-* } .-1 } */ -+/* a b c RLI⧠1 2 3 PDIâ© x y */ -+/* { dg-warning "U\\+2067" "" { target *-*-* } .-1 } */ -+/* a b c FSI⨠1 2 3 PDIâ© x y z */ -+/* { dg-warning "U\\+2068" "" { target *-*-* } .-1 } */ -+ -+/* Same but C++ comments instead. */ -+// a b c LRE⪠1 2 3 PDF⬠x y z -+/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */ -+// a b c RLEâ« 1 2 3 PDF⬠x y z -+/* { dg-warning "U\\+202B" "" { target *-*-* } .-1 } */ -+// a b c LROâ 1 2 3 PDF⬠x y z -+/* { dg-warning "U\\+202D" "" { target *-*-* } .-1 } */ -+// a b c RLOâ® 1 2 3 PDF⬠x y z -+/* { dg-warning "U\\+202E" "" { target *-*-* } .-1 } */ -+// a b c LRI⦠1 2 3 PDIâ© x y z -+/* { dg-warning "U\\+2066" "" { target *-*-* } .-1 } */ -+// a b c RLI⧠1 2 3 PDIâ© x y -+/* { dg-warning "U\\+2067" "" { target *-*-* } .-1 } */ -+// a b c FSI⨠1 2 3 PDIâ© x y z -+/* { dg-warning "U\\+2068" "" { target *-*-* } .-1 } */ -+ -+/* Here we're closing an unopened context, warn when =any. */ -+/* a b c PDIâ© x y z */ -+/* { dg-warning "U\\+2069" "" { target *-*-* } .-1 } */ -+/* a b c PDF⬠x y z */ -+/* { dg-warning "U\\+202C" "" { target *-*-* } .-1 } */ -+// a b c PDIâ© x y z -+/* { dg-warning "U\\+2069" "" { target *-*-* } .-1 } */ -+// a b c PDF⬠x y z -+/* { dg-warning "U\\+202C" "" { target *-*-* } .-1 } */ -+ -+/* Multiline comments. */ -+/* a b c PDIâ© x y z -+ */ -+/* { dg-warning "U\\+2069" "" { target *-*-* } .-2 } */ -+/* a b c PDF⬠x y z -+ */ -+/* { dg-warning "U\\+202C" "" { target *-*-* } .-2 } */ -+/* first -+ a b c PDIâ© x y z -+ */ -+/* { dg-warning "U\\+2069" "" { target *-*-* } .-2 } */ -+/* first -+ a b c PDF⬠x y z -+ */ -+/* { dg-warning "U\\+202C" "" { target *-*-* } .-2 } */ -+/* first -+ a b c PDIâ© x y z */ -+/* { dg-warning "U\\+2069" "" { target *-*-* } .-1 } */ -+/* first -+ a b c PDF⬠x y z */ -+/* { dg-warning "U\\+202C" "" { target *-*-* } .-1 } */ -+ -+void -+g1 () -+{ -+ const char *s1 = "a b c LRE⪠1 2 3 PDF⬠x y z"; -+/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */ -+ const char *s2 = "a b c RLEâ« 1 2 3 PDF⬠x y z"; -+/* { dg-warning "U\\+202B" "" { target *-*-* } .-1 } */ -+ const char *s3 = "a b c LROâ 1 2 3 PDF⬠x y z"; -+/* { dg-warning "U\\+202D" "" { target *-*-* } .-1 } */ -+ const char *s4 = "a b c RLOâ® 1 2 3 PDF⬠x y z"; -+/* { dg-warning "U\\+202E" "" { target *-*-* } .-1 } */ -+ const char *s5 = "a b c LRI⦠1 2 3 PDIâ© x y z"; -+/* { dg-warning "U\\+2066" "" { target *-*-* } .-1 } */ -+ const char *s6 = "a b c RLI⧠1 2 3 PDIâ© x y z"; -+/* { dg-warning "U\\+2067" "" { target *-*-* } .-1 } */ -+ const char *s7 = "a b c FSI⨠1 2 3 PDIâ© x y z"; -+/* { dg-warning "U\\+2068" "" { target *-*-* } .-1 } */ -+ const char *s8 = "a b c PDIâ© x y z"; -+/* { dg-warning "U\\+2069" "" { target *-*-* } .-1 } */ -+ const char *s9 = "a b c PDF⬠x y z"; -+/* { dg-warning "U\\+202C" "" { target *-*-* } .-1 } */ -+ -+ const char *s10 = "a b c LRE\u202a 1 2 3 PDF\u202c x y z"; -+/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */ -+ const char *s11 = "a b c LRE\u202A 1 2 3 PDF\u202c x y z"; -+/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */ -+ const char *s12 = "a b c RLE\u202b 1 2 3 PDF\u202c x y z"; -+/* { dg-warning "U\\+202B" "" { target *-*-* } .-1 } */ -+ const char *s13 = "a b c RLE\u202B 1 2 3 PDF\u202c x y z"; -+/* { dg-warning "U\\+202B" "" { target *-*-* } .-1 } */ -+ const char *s14 = "a b c LRO\u202d 1 2 3 PDF\u202c x y z"; -+/* { dg-warning "U\\+202D" "" { target *-*-* } .-1 } */ -+ const char *s15 = "a b c LRO\u202D 1 2 3 PDF\u202c x y z"; -+/* { dg-warning "U\\+202D" "" { target *-*-* } .-1 } */ -+ const char *s16 = "a b c RLO\u202e 1 2 3 PDF\u202c x y z"; -+/* { dg-warning "U\\+202E" "" { target *-*-* } .-1 } */ -+ const char *s17 = "a b c RLO\u202E 1 2 3 PDF\u202c x y z"; -+/* { dg-warning "U\\+202E" "" { target *-*-* } .-1 } */ -+ const char *s18 = "a b c LRI\u2066 1 2 3 PDI\u2069 x y z"; -+/* { dg-warning "U\\+2066" "" { target *-*-* } .-1 } */ -+ const char *s19 = "a b c RLI\u2067 1 2 3 PDI\u2069 x y z"; -+/* { dg-warning "U\\+2067" "" { target *-*-* } .-1 } */ -+ const char *s20 = "a b c FSI\u2068 1 2 3 PDI\u2069 x y z"; -+/* { dg-warning "U\\+2068" "" { target *-*-* } .-1 } */ -+} -+ -+void -+g2 () -+{ -+ const char c1 = '\u202a'; -+/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */ -+ const char c2 = '\u202A'; -+/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */ -+ const char c3 = '\u202b'; -+/* { dg-warning "U\\+202B" "" { target *-*-* } .-1 } */ -+ const char c4 = '\u202B'; -+/* { dg-warning "U\\+202B" "" { target *-*-* } .-1 } */ -+ const char c5 = '\u202d'; -+/* { dg-warning "U\\+202D" "" { target *-*-* } .-1 } */ -+ const char c6 = '\u202D'; -+/* { dg-warning "U\\+202D" "" { target *-*-* } .-1 } */ -+ const char c7 = '\u202e'; -+/* { dg-warning "U\\+202E" "" { target *-*-* } .-1 } */ -+ const char c8 = '\u202E'; -+/* { dg-warning "U\\+202E" "" { target *-*-* } .-1 } */ -+ const char c9 = '\u2066'; -+/* { dg-warning "U\\+2066" "" { target *-*-* } .-1 } */ -+ const char c10 = '\u2067'; -+/* { dg-warning "U\\+2067" "" { target *-*-* } .-1 } */ -+ const char c11 = '\u2068'; -+/* { dg-warning "U\\+2068" "" { target *-*-* } .-1 } */ -+} -+ -+int aâªbâ¬c; -+/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */ -+int aâ«bâ¬c; -+/* { dg-warning "U\\+202B" "" { target *-*-* } .-1 } */ -+int aâbâ¬c; -+/* { dg-warning "U\\+202D" "" { target *-*-* } .-1 } */ -+int aâ®bâ¬c; -+/* { dg-warning "U\\+202E" "" { target *-*-* } .-1 } */ -+int aâ¦bâ©c; -+/* { dg-warning "U\\+2066" "" { target *-*-* } .-1 } */ -+int aâ§bâ©c; -+/* { dg-warning "U\\+2067" "" { target *-*-* } .-1 } */ -+int aâ¨bâ©c; -+/* { dg-warning "U\\+2068" "" { target *-*-* } .-1 } */ -+int Aâ¬X; -+/* { dg-warning "U\\+202C" "" { target *-*-* } .-1 } */ -+int A\u202cY; -+/* { dg-warning "U\\+202C" "" { target *-*-* } .-1 } */ -+int A\u202CY2; -+/* { dg-warning "U\\+202C" "" { target *-*-* } .-1 } */ -+ -+int d\u202ae\u202cf; -+/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */ -+int d\u202Ae\u202cf2; -+/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */ -+int d\u202be\u202cf; -+/* { dg-warning "U\\+202B" "" { target *-*-* } .-1 } */ -+int d\u202Be\u202cf2; -+/* { dg-warning "U\\+202B" "" { target *-*-* } .-1 } */ -+int d\u202de\u202cf; -+/* { dg-warning "U\\+202D" "" { target *-*-* } .-1 } */ -+int d\u202De\u202cf2; -+/* { dg-warning "U\\+202D" "" { target *-*-* } .-1 } */ -+int d\u202ee\u202cf; -+/* { dg-warning "U\\+202E" "" { target *-*-* } .-1 } */ -+int d\u202Ee\u202cf2; -+/* { dg-warning "U\\+202E" "" { target *-*-* } .-1 } */ -+int d\u2066e\u2069f; -+/* { dg-warning "U\\+2066" "" { target *-*-* } .-1 } */ -+int d\u2067e\u2069f; -+/* { dg-warning "U\\+2067" "" { target *-*-* } .-1 } */ -+int d\u2068e\u2069f; -+/* { dg-warning "U\\+2068" "" { target *-*-* } .-1 } */ -+int X\u2069; -+/* { dg-warning "U\\+2069" "" { target *-*-* } .-1 } */ -diff --git a/gcc/testsuite/c-c++-common/Wbidi-chars-5.c b/gcc/testsuite/c-c++-common/Wbidi-chars-5.c -new file mode 100644 -index 00000000000..68cb053144b ---- /dev/null -+++ b/gcc/testsuite/c-c++-common/Wbidi-chars-5.c -@@ -0,0 +1,188 @@ -+/* PR preprocessor/103026 */ -+/* { dg-do compile } */ -+/* { dg-options "-Wbidi-chars=unpaired -Wno-multichar -Wno-overflow" } */ -+/* Test all bidi chars in various contexts (identifiers, comments, -+ string literals, character constants), both UCN and UTF-8. The bidi -+ chars here are properly terminated, except for the character constants. */ -+ -+/* a b c LRE⪠1 2 3 PDF⬠x y z */ -+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ -+/* a b c RLEâ« 1 2 3 PDF⬠x y z */ -+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ -+/* a b c LROâ 1 2 3 PDF⬠x y z */ -+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ -+/* a b c RLOâ® 1 2 3 PDF⬠x y z */ -+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ -+/* a b c LRI⦠1 2 3 PDIâ© x y z */ -+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ -+/* a b c RLI⧠1 2 3 PDIâ© x y */ -+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ -+/* a b c FSI⨠1 2 3 PDIâ© x y z */ -+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ -+ -+/* Same but C++ comments instead. */ -+// a b c LRE⪠1 2 3 PDF⬠x y z -+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ -+// a b c RLEâ« 1 2 3 PDF⬠x y z -+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ -+// a b c LROâ 1 2 3 PDF⬠x y z -+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ -+// a b c RLOâ® 1 2 3 PDF⬠x y z -+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ -+// a b c LRI⦠1 2 3 PDIâ© x y z -+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ -+// a b c RLI⧠1 2 3 PDIâ© x y -+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ -+// a b c FSI⨠1 2 3 PDIâ© x y z -+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ -+ -+/* Here we're closing an unopened context, warn when =any. */ -+/* a b c PDIâ© x y z */ -+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ -+/* a b c PDF⬠x y z */ -+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ -+// a b c PDIâ© x y z -+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ -+// a b c PDF⬠x y z -+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ -+ -+/* Multiline comments. */ -+/* a b c PDIâ© x y z -+ */ -+/* { dg-bogus "unpaired" "" { target *-*-* } .-2 } */ -+/* a b c PDF⬠x y z -+ */ -+/* { dg-bogus "unpaired" "" { target *-*-* } .-2 } */ -+/* first -+ a b c PDIâ© x y z -+ */ -+/* { dg-bogus "unpaired" "" { target *-*-* } .-2 } */ -+/* first -+ a b c PDF⬠x y z -+ */ -+/* { dg-bogus "unpaired" "" { target *-*-* } .-2 } */ -+/* first -+ a b c PDIâ© x y z */ -+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ -+/* first -+ a b c PDF⬠x y z */ -+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ -+ -+void -+g1 () -+{ -+ const char *s1 = "a b c LRE⪠1 2 3 PDF⬠x y z"; -+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ -+ const char *s2 = "a b c RLEâ« 1 2 3 PDF⬠x y z"; -+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ -+ const char *s3 = "a b c LROâ 1 2 3 PDF⬠x y z"; -+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ -+ const char *s4 = "a b c RLOâ® 1 2 3 PDF⬠x y z"; -+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ -+ const char *s5 = "a b c LRI⦠1 2 3 PDIâ© x y z"; -+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ -+ const char *s6 = "a b c RLI⧠1 2 3 PDIâ© x y z"; -+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ -+ const char *s7 = "a b c FSI⨠1 2 3 PDIâ© x y z"; -+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ -+ const char *s8 = "a b c PDIâ© x y z"; -+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ -+ const char *s9 = "a b c PDF⬠x y z"; -+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ -+ -+ const char *s10 = "a b c LRE\u202a 1 2 3 PDF\u202c x y z"; -+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ -+ const char *s11 = "a b c LRE\u202A 1 2 3 PDF\u202c x y z"; -+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ -+ const char *s12 = "a b c RLE\u202b 1 2 3 PDF\u202c x y z"; -+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ -+ const char *s13 = "a b c RLE\u202B 1 2 3 PDF\u202c x y z"; -+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ -+ const char *s14 = "a b c LRO\u202d 1 2 3 PDF\u202c x y z"; -+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ -+ const char *s15 = "a b c LRO\u202D 1 2 3 PDF\u202c x y z"; -+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ -+ const char *s16 = "a b c RLO\u202e 1 2 3 PDF\u202c x y z"; -+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ -+ const char *s17 = "a b c RLO\u202E 1 2 3 PDF\u202c x y z"; -+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ -+ const char *s18 = "a b c LRI\u2066 1 2 3 PDI\u2069 x y z"; -+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ -+ const char *s19 = "a b c RLI\u2067 1 2 3 PDI\u2069 x y z"; -+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ -+ const char *s20 = "a b c FSI\u2068 1 2 3 PDI\u2069 x y z"; -+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ -+} -+ -+void -+g2 () -+{ -+ const char c1 = '\u202a'; -+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ -+ const char c2 = '\u202A'; -+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ -+ const char c3 = '\u202b'; -+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ -+ const char c4 = '\u202B'; -+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ -+ const char c5 = '\u202d'; -+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ -+ const char c6 = '\u202D'; -+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ -+ const char c7 = '\u202e'; -+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ -+ const char c8 = '\u202E'; -+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ -+ const char c9 = '\u2066'; -+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ -+ const char c10 = '\u2067'; -+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ -+ const char c11 = '\u2068'; -+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ -+} -+ -+int aâªbâ¬c; -+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ -+int aâ«bâ¬c; -+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ -+int aâbâ¬c; -+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ -+int aâ®bâ¬c; -+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ -+int aâ¦bâ©c; -+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ -+int aâ§bâ©c; -+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ -+int aâ¨bâ©c; -+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ -+int Aâ¬X; -+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ -+int A\u202cY; -+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ -+int A\u202CY2; -+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ -+ -+int d\u202ae\u202cf; -+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ -+int d\u202Ae\u202cf2; -+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ -+int d\u202be\u202cf; -+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ -+int d\u202Be\u202cf2; -+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ -+int d\u202de\u202cf; -+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ -+int d\u202De\u202cf2; -+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ -+int d\u202ee\u202cf; -+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ -+int d\u202Ee\u202cf2; -+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ -+int d\u2066e\u2069f; -+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ -+int d\u2067e\u2069f; -+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ -+int d\u2068e\u2069f; -+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ -+int X\u2069; -+/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ -diff --git a/gcc/testsuite/c-c++-common/Wbidi-chars-6.c b/gcc/testsuite/c-c++-common/Wbidi-chars-6.c -new file mode 100644 -index 00000000000..0ce6fff2dee ---- /dev/null -+++ b/gcc/testsuite/c-c++-common/Wbidi-chars-6.c -@@ -0,0 +1,155 @@ -+/* PR preprocessor/103026 */ -+/* { dg-do compile } */ -+/* { dg-options "-Wbidi-chars=unpaired" } */ -+/* Test nesting of bidi chars in various contexts. */ -+ -+/* Terminated by the wrong char: */ -+/* a b c LRE⪠1 2 3 PDIâ© x y z */ -+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ -+/* a b c RLEâ« 1 2 3 PDIâ© x y z*/ -+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ -+/* a b c LROâ 1 2 3 PDIâ© x y z */ -+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ -+/* a b c RLOâ® 1 2 3 PDIâ© x y z */ -+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ -+/* a b c LRI⦠1 2 3 PDF⬠x y z */ -+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ -+/* a b c RLI⧠1 2 3 PDF⬠x y z */ -+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ -+/* a b c FSI⨠1 2 3 PDF⬠x y z*/ -+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ -+ -+/* LRE⪠PDF⬠*/ -+/* LRE⪠LRE⪠PDF⬠PDF⬠*/ -+/* PDF⬠LRE⪠PDF⬠*/ -+/* LRE⪠PDF⬠LRE⪠PDF⬠*/ -+/* LRE⪠LRE⪠PDF⬠*/ -+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ -+/* PDF⬠LRE⪠*/ -+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ -+ -+// a b c LRE⪠1 2 3 PDIâ© x y z -+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ -+// a b c RLEâ« 1 2 3 PDIâ© x y z*/ -+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ -+// a b c LROâ 1 2 3 PDIâ© x y z -+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ -+// a b c RLOâ® 1 2 3 PDIâ© x y z -+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ -+// a b c LRI⦠1 2 3 PDF⬠x y z -+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ -+// a b c RLI⧠1 2 3 PDF⬠x y z -+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ -+// a b c FSI⨠1 2 3 PDF⬠x y z -+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ -+ -+// LRE⪠PDF⬠-+// LRE⪠LRE⪠PDF⬠PDF⬠-+// PDF⬠LRE⪠PDF⬠-+// LRE⪠PDF⬠LRE⪠PDF⬠-+// LRE⪠LRE⪠PDF⬠-+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ -+// PDF⬠LRE⪠-+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ -+ -+void -+g1 () -+{ -+ const char *s1 = "a b c LRE⪠1 2 3 PDIâ© x y z"; -+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ -+ const char *s2 = "a b c LRE\u202a 1 2 3 PDI\u2069 x y z"; -+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ -+ const char *s3 = "a b c RLEâ« 1 2 3 PDIâ© x y "; -+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ -+ const char *s4 = "a b c RLE\u202b 1 2 3 PDI\u2069 x y z"; -+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ -+ const char *s5 = "a b c LROâ 1 2 3 PDIâ© x y z"; -+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ -+ const char *s6 = "a b c LRO\u202d 1 2 3 PDI\u2069 x y z"; -+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ -+ const char *s7 = "a b c RLOâ® 1 2 3 PDIâ© x y z"; -+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ -+ const char *s8 = "a b c RLO\u202e 1 2 3 PDI\u2069 x y z"; -+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ -+ const char *s9 = "a b c LRI⦠1 2 3 PDF⬠x y z"; -+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ -+ const char *s10 = "a b c LRI\u2066 1 2 3 PDF\u202c x y z"; -+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ -+ const char *s11 = "a b c RLI⧠1 2 3 PDF⬠x y z\ -+ "; -+/* { dg-warning "unpaired" "" { target *-*-* } .-2 } */ -+ const char *s12 = "a b c RLI\u2067 1 2 3 PDF\u202c x y z"; -+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ -+ const char *s13 = "a b c FSI⨠1 2 3 PDF⬠x y z"; -+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ -+ const char *s14 = "a b c FSI\u2068 1 2 3 PDF\u202c x y z"; -+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ -+ const char *s15 = "PDF⬠LREâª"; -+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ -+ const char *s16 = "PDF\u202c LRE\u202a"; -+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ -+ const char *s17 = "LRE⪠PDFâ¬"; -+ const char *s18 = "LRE\u202a PDF\u202c"; -+ const char *s19 = "LRE⪠LRE⪠PDF⬠PDFâ¬"; -+ const char *s20 = "LRE\u202a LRE\u202a PDF\u202c PDF\u202c"; -+ const char *s21 = "PDF⬠LRE⪠PDFâ¬"; -+ const char *s22 = "PDF\u202c LRE\u202a PDF\u202c"; -+ const char *s23 = "LRE⪠LRE⪠PDFâ¬"; -+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ -+ const char *s24 = "LRE\u202a LRE\u202a PDF\u202c"; -+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ -+ const char *s25 = "PDF⬠LREâª"; -+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ -+ const char *s26 = "PDF\u202c LRE\u202a"; -+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ -+ const char *s27 = "PDF⬠LRE\u202a"; -+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ -+ const char *s28 = "PDF\u202c LREâª"; -+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ -+} -+ -+int aLREâªbPDIâ©; -+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ -+int A\u202aB\u2069C; -+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ -+int aRLEâ«bPDIâ©; -+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ -+int a\u202bB\u2069c; -+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ -+int aLROâbPDIâ©; -+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ -+int a\u202db\u2069c2; -+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ -+int aRLOâ®bPDIâ©; -+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ -+int a\u202eb\u2069; -+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ -+int aLRIâ¦bPDFâ¬; -+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ -+int a\u2066b\u202c; -+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ -+int aRLIâ§bPDFâ¬c -+; -+/* { dg-warning "unpaired" "" { target *-*-* } .-2 } */ -+int a\u2067b\u202c; -+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ -+int aFSIâ¨bPDFâ¬; -+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ -+int a\u2068b\u202c; -+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ -+int aFSIâ¨bPD\u202C; -+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ -+int aFSI\u2068bPDFâ¬_; -+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ -+int aLREâªbPDFâ¬b; -+int A\u202aB\u202c; -+int a_LREâª_LREâª_b_PDFâ¬_PDFâ¬; -+int A\u202aA\u202aB\u202cB\u202c; -+int aPDFâ¬bLREadPDFâ¬; -+int a_\u202C_\u202a_\u202c; -+int a_LREâª_b_PDFâ¬_c_LREâª_PDFâ¬; -+int a_\u202a_\u202c_\u202a_\u202c_; -+int a_LREâª_b_PDFâ¬_c_LREâª; -+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ -+int a_\u202a_\u202c_\u202a_; -+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ -diff --git a/gcc/testsuite/c-c++-common/Wbidi-chars-7.c b/gcc/testsuite/c-c++-common/Wbidi-chars-7.c -new file mode 100644 -index 00000000000..d012d420ec0 ---- /dev/null -+++ b/gcc/testsuite/c-c++-common/Wbidi-chars-7.c -@@ -0,0 +1,9 @@ -+/* PR preprocessor/103026 */ -+/* { dg-do compile } */ -+/* { dg-options "-Wbidi-chars=any" } */ -+/* Test we ignore UCNs in comments. */ -+ -+// a b c \u202a 1 2 3 -+// a b c \u202A 1 2 3 -+/* a b c \u202a 1 2 3 */ -+/* a b c \u202A 1 2 3 */ -diff --git a/gcc/testsuite/c-c++-common/Wbidi-chars-8.c b/gcc/testsuite/c-c++-common/Wbidi-chars-8.c -new file mode 100644 -index 00000000000..4f54c5092ec ---- /dev/null -+++ b/gcc/testsuite/c-c++-common/Wbidi-chars-8.c -@@ -0,0 +1,13 @@ -+/* PR preprocessor/103026 */ -+/* { dg-do compile } */ -+/* { dg-options "-Wbidi-chars=any" } */ -+/* Test \u vs \U. */ -+ -+int a_\u202A; -+/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */ -+int a_\u202a_2; -+/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */ -+int a_\U0000202A_3; -+/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */ -+int a_\U0000202a_4; -+/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */ -diff --git a/gcc/testsuite/c-c++-common/Wbidi-chars-9.c b/gcc/testsuite/c-c++-common/Wbidi-chars-9.c -new file mode 100644 -index 00000000000..e2af1b1ca97 ---- /dev/null -+++ b/gcc/testsuite/c-c++-common/Wbidi-chars-9.c -@@ -0,0 +1,29 @@ -+/* PR preprocessor/103026 */ -+/* { dg-do compile } */ -+/* { dg-options "-Wbidi-chars=unpaired" } */ -+/* Test that we properly separate bidi contexts (comment/identifier/character -+ constant/string literal). */ -+ -+/* LRE ->âª<- */ int pdf_\u202c_1; -+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ -+/* RLE ->â«<- */ int pdf_\u202c_2; -+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ -+/* LRO ->â<- */ int pdf_\u202c_3; -+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ -+/* RLO ->â®<- */ int pdf_\u202c_4; -+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ -+/* LRI ->â¦<-*/ int pdi_\u2069_1; -+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ -+/* RLI ->â§<- */ int pdi_\u2069_12; -+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ -+/* FSI ->â¨<- */ int pdi_\u2069_3; -+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ -+ -+const char *s1 = "LRE\u202a"; /* PDF ->â¬<- */ -+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ -+/* LRE ->âª<- */ const char *s2 = "PDF\u202c"; -+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ -+const char *s3 = "LRE\u202a"; int pdf_\u202c_5; -+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ -+int lre_\u202a; const char *s4 = "PDF\u202c"; -+/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ -diff --git a/libcpp/include/cpplib.h b/libcpp/include/cpplib.h -index 176f8c5bbce..112b9c24751 100644 ---- a/libcpp/include/cpplib.h -+++ b/libcpp/include/cpplib.h -@@ -318,6 +318,17 @@ enum cpp_main_search - CMS_system, /* Search the system INCLUDE path. */ - }; - -+/* The possible bidirectional control characters checking levels, from least -+ restrictive to most. */ -+enum cpp_bidirectional_level { -+ /* No checking. */ -+ bidirectional_none, -+ /* Only detect unpaired uses of bidirectional control characters. */ -+ bidirectional_unpaired, -+ /* Detect any use of bidirectional control characters. */ -+ bidirectional_any -+}; -+ - /* This structure is nested inside struct cpp_reader, and - carries all the options visible to the command line. */ - struct cpp_options -@@ -531,6 +542,10 @@ struct cpp_options - /* True if warn about differences between C++98 and C++11. */ - bool cpp_warn_cxx11_compat; - -+ /* Nonzero if bidirectional control characters checking is on. See enum -+ cpp_bidirectional_level. */ -+ unsigned char cpp_warn_bidirectional; -+ - /* Dependency generation. */ - struct - { -@@ -635,7 +650,8 @@ enum cpp_warning_reason { - CPP_W_C90_C99_COMPAT, - CPP_W_C11_C2X_COMPAT, - CPP_W_CXX11_COMPAT, -- CPP_W_EXPANSION_TO_DEFINED -+ CPP_W_EXPANSION_TO_DEFINED, -+ CPP_W_BIDIRECTIONAL - }; - - /* Callback for header lookup for HEADER, which is the name of a -diff --git a/libcpp/init.c b/libcpp/init.c -index 5a424e23553..f9a8f5f088f 100644 ---- a/libcpp/init.c -+++ b/libcpp/init.c -@@ -219,6 +219,7 @@ cpp_create_reader (enum c_lang lang, cpp - = ENABLE_CANONICAL_SYSTEM_HEADERS; - CPP_OPTION (pfile, ext_numeric_literals) = 1; - CPP_OPTION (pfile, warn_date_time) = 0; -+ CPP_OPTION (pfile, cpp_warn_bidirectional) = bidirectional_unpaired; - - /* Default CPP arithmetic to something sensible for the host for the - benefit of dumb users like fix-header. */ -diff --git a/libcpp/internal.h b/libcpp/internal.h -index 8577cab6c83..0ce0246c5a2 100644 ---- a/libcpp/internal.h -+++ b/libcpp/internal.h -@@ -597,6 +597,13 @@ struct cpp_reader - /* Location identifying the main source file -- intended to be line - zero of said file. */ - location_t main_loc; -+ -+ /* Returns true iff we should warn about UTF-8 bidirectional control -+ characters. */ -+ bool warn_bidi_p () const -+ { -+ return CPP_OPTION (this, cpp_warn_bidirectional) != bidirectional_none; -+ } - }; - - /* Character classes. Based on the more primitive macros in safe-ctype.h. -diff --git a/libcpp/lex.c b/libcpp/lex.c -index fa2253d41c3..6a4fbce6030 100644 ---- a/libcpp/lex.c -+++ b/libcpp/lex.c -@@ -1164,6 +1164,324 @@ _cpp_process_line_notes (cpp_reader *pfi - } - } - -+namespace bidi { -+ enum class kind { -+ NONE, LRE, RLE, LRO, RLO, LRI, RLI, FSI, PDF, PDI, LTR, RTL -+ }; -+ -+ /* All the UTF-8 encodings of bidi characters start with E2. */ -+ constexpr uchar utf8_start = 0xe2; -+ -+ /* A vector holding currently open bidi contexts. We use a char for -+ each context, its LSB is 1 if it represents a PDF context, 0 if it -+ represents a PDI context. The next bit is 1 if this context was open -+ by a bidi character written as a UCN, and 0 when it was UTF-8. */ -+ semi_embedded_vec <unsigned char, 16> vec; -+ -+ /* Close the whole comment/identifier/string literal/character constant -+ context. */ -+ void on_close () -+ { -+ vec.truncate (0); -+ } -+ -+ /* Pop the last element in the vector. */ -+ void pop () -+ { -+ unsigned int len = vec.count (); -+ gcc_checking_assert (len > 0); -+ vec.truncate (len - 1); -+ } -+ -+ /* Return the context of the Ith element. */ -+ kind ctx_at (unsigned int i) -+ { -+ return (vec[i] & 1) ? kind::PDF : kind::PDI; -+ } -+ -+ /* Return which context is currently opened. */ -+ kind current_ctx () -+ { -+ unsigned int len = vec.count (); -+ if (len == 0) -+ return kind::NONE; -+ return ctx_at (len - 1); -+ } -+ -+ /* Return true if the current context comes from a UCN origin, that is, -+ the bidi char which started this bidi context was written as a UCN. */ -+ bool current_ctx_ucn_p () -+ { -+ unsigned int len = vec.count (); -+ gcc_checking_assert (len > 0); -+ return (vec[len - 1] >> 1) & 1; -+ } -+ -+ /* We've read a bidi char, update the current vector as necessary. */ -+ void on_char (kind k, bool ucn_p) -+ { -+ switch (k) -+ { -+ case kind::LRE: -+ case kind::RLE: -+ case kind::LRO: -+ case kind::RLO: -+ vec.push (ucn_p ? 3u : 1u); -+ break; -+ case kind::LRI: -+ case kind::RLI: -+ case kind::FSI: -+ vec.push (ucn_p ? 2u : 0u); -+ break; -+ /* PDF terminates the scope of the last LRE, RLE, LRO, or RLO -+ whose scope has not yet been terminated. */ -+ case kind::PDF: -+ if (current_ctx () == kind::PDF) -+ pop (); -+ break; -+ /* PDI terminates the scope of the last LRI, RLI, or FSI whose -+ scope has not yet been terminated, as well as the scopes of -+ any subsequent LREs, RLEs, LROs, or RLOs whose scopes have not -+ yet been terminated. */ -+ case kind::PDI: -+ for (int i = vec.count () - 1; i >= 0; --i) -+ if (ctx_at (i) == kind::PDI) -+ { -+ vec.truncate (i); -+ break; -+ } -+ break; -+ case kind::LTR: -+ case kind::RTL: -+ /* These aren't popped by a PDF/PDI. */ -+ break; -+ [[likely]] case kind::NONE: -+ break; -+ default: -+ abort (); -+ } -+ } -+ -+ /* Return a descriptive string for K. */ -+ const char *to_str (kind k) -+ { -+ switch (k) -+ { -+ case kind::LRE: -+ return "U+202A (LEFT-TO-RIGHT EMBEDDING)"; -+ case kind::RLE: -+ return "U+202B (RIGHT-TO-LEFT EMBEDDING)"; -+ case kind::LRO: -+ return "U+202D (LEFT-TO-RIGHT OVERRIDE)"; -+ case kind::RLO: -+ return "U+202E (RIGHT-TO-LEFT OVERRIDE)"; -+ case kind::LRI: -+ return "U+2066 (LEFT-TO-RIGHT ISOLATE)"; -+ case kind::RLI: -+ return "U+2067 (RIGHT-TO-LEFT ISOLATE)"; -+ case kind::FSI: -+ return "U+2068 (FIRST STRONG ISOLATE)"; -+ case kind::PDF: -+ return "U+202C (POP DIRECTIONAL FORMATTING)"; -+ case kind::PDI: -+ return "U+2069 (POP DIRECTIONAL ISOLATE)"; -+ case kind::LTR: -+ return "U+200E (LEFT-TO-RIGHT MARK)"; -+ case kind::RTL: -+ return "U+200F (RIGHT-TO-LEFT MARK)"; -+ default: -+ abort (); -+ } -+ } -+} -+ -+/* Parse a sequence of 3 bytes starting with P and return its bidi code. */ -+ -+static bidi::kind -+get_bidi_utf8 (const unsigned char *const p) -+{ -+ gcc_checking_assert (p[0] == bidi::utf8_start); -+ -+ if (p[1] == 0x80) -+ switch (p[2]) -+ { -+ case 0xaa: -+ return bidi::kind::LRE; -+ case 0xab: -+ return bidi::kind::RLE; -+ case 0xac: -+ return bidi::kind::PDF; -+ case 0xad: -+ return bidi::kind::LRO; -+ case 0xae: -+ return bidi::kind::RLO; -+ case 0x8e: -+ return bidi::kind::LTR; -+ case 0x8f: -+ return bidi::kind::RTL; -+ default: -+ break; -+ } -+ else if (p[1] == 0x81) -+ switch (p[2]) -+ { -+ case 0xa6: -+ return bidi::kind::LRI; -+ case 0xa7: -+ return bidi::kind::RLI; -+ case 0xa8: -+ return bidi::kind::FSI; -+ case 0xa9: -+ return bidi::kind::PDI; -+ default: -+ break; -+ } -+ -+ return bidi::kind::NONE; -+} -+ -+/* Parse a UCN where P points just past \u or \U and return its bidi code. */ -+ -+static bidi::kind -+get_bidi_ucn (const unsigned char *p, bool is_U) -+{ -+ /* 6.4.3 Universal Character Names -+ \u hex-quad -+ \U hex-quad hex-quad -+ where \unnnn means \U0000nnnn. */ -+ -+ if (is_U) -+ { -+ if (p[0] != '0' || p[1] != '0' || p[2] != '0' || p[3] != '0') -+ return bidi::kind::NONE; -+ /* Skip 4B so we can treat \u and \U the same below. */ -+ p += 4; -+ } -+ -+ /* All code points we are looking for start with 20xx. */ -+ if (p[0] != '2' || p[1] != '0') -+ return bidi::kind::NONE; -+ else if (p[2] == '2') -+ switch (p[3]) -+ { -+ case 'a': -+ case 'A': -+ return bidi::kind::LRE; -+ case 'b': -+ case 'B': -+ return bidi::kind::RLE; -+ case 'c': -+ case 'C': -+ return bidi::kind::PDF; -+ case 'd': -+ case 'D': -+ return bidi::kind::LRO; -+ case 'e': -+ case 'E': -+ return bidi::kind::RLO; -+ default: -+ break; -+ } -+ else if (p[2] == '6') -+ switch (p[3]) -+ { -+ case '6': -+ return bidi::kind::LRI; -+ case '7': -+ return bidi::kind::RLI; -+ case '8': -+ return bidi::kind::FSI; -+ case '9': -+ return bidi::kind::PDI; -+ default: -+ break; -+ } -+ else if (p[2] == '0') -+ switch (p[3]) -+ { -+ case 'e': -+ case 'E': -+ return bidi::kind::LTR; -+ case 'f': -+ case 'F': -+ return bidi::kind::RTL; -+ default: -+ break; -+ } -+ -+ return bidi::kind::NONE; -+} -+ -+/* We're closing a bidi context, that is, we've encountered a newline, -+ are closing a C-style comment, or are at the end of a string literal, -+ character constant, or identifier. Warn if this context was not -+ properly terminated by a PDI or PDF. P points to the last character -+ in this context. */ -+ -+static void -+maybe_warn_bidi_on_close (cpp_reader *pfile, const uchar *p) -+{ -+ if (CPP_OPTION (pfile, cpp_warn_bidirectional) == bidirectional_unpaired -+ && bidi::vec.count () > 0) -+ { -+ const location_t loc -+ = linemap_position_for_column (pfile->line_table, -+ CPP_BUF_COLUMN (pfile->buffer, p)); -+ cpp_warning_with_line (pfile, CPP_W_BIDIRECTIONAL, loc, 0, -+ "unpaired UTF-8 bidirectional control character " -+ "detected"); -+ } -+ /* We're done with this context. */ -+ bidi::on_close (); -+} -+ -+/* We're at the beginning or in the middle of an identifier/comment/string -+ literal/character constant. Warn if we've encountered a bidi character. -+ KIND says which bidi character it was; P points to it in the character -+ stream. UCN_P is true iff this bidi character was written as a UCN. */ -+ -+static void -+maybe_warn_bidi_on_char (cpp_reader *pfile, const uchar *p, bidi::kind kind, -+ bool ucn_p) -+{ -+ if (__builtin_expect (kind == bidi::kind::NONE, 1)) -+ return; -+ -+ const auto warn_bidi = CPP_OPTION (pfile, cpp_warn_bidirectional); -+ -+ if (warn_bidi != bidirectional_none) -+ { -+ const location_t loc -+ = linemap_position_for_column (pfile->line_table, -+ CPP_BUF_COLUMN (pfile->buffer, p)); -+ /* It seems excessive to warn about a PDI/PDF that is closing -+ an opened context because we've already warned about the -+ opening character. Except warn when we have a UCN x UTF-8 -+ mismatch. */ -+ if (kind == bidi::current_ctx ()) -+ { -+ if (warn_bidi == bidirectional_unpaired -+ && bidi::current_ctx_ucn_p () != ucn_p) -+ cpp_warning_with_line (pfile, CPP_W_BIDIRECTIONAL, loc, 0, -+ "UTF-8 vs UCN mismatch when closing " -+ "a context by \"%s\"", bidi::to_str (kind)); -+ } -+ else if (warn_bidi == bidirectional_any) -+ { -+ if (kind == bidi::kind::PDF || kind == bidi::kind::PDI) -+ cpp_warning_with_line (pfile, CPP_W_BIDIRECTIONAL, loc, 0, -+ "\"%s\" is closing an unopened context", -+ bidi::to_str (kind)); -+ else -+ cpp_warning_with_line (pfile, CPP_W_BIDIRECTIONAL, loc, 0, -+ "found problematic Unicode character \"%s\"", -+ bidi::to_str (kind)); -+ } -+ } -+ /* We're done with this context. */ -+ bidi::on_char (kind, ucn_p); -+} -+ - /* Skip a C-style block comment. We find the end of the comment by - seeing if an asterisk is before every '/' we encounter. Returns - nonzero if comment terminated by EOF, zero otherwise. -@@ -1175,6 +1493,7 @@ _cpp_skip_block_comment (cpp_reader *pfi - cpp_buffer *buffer = pfile->buffer; - const uchar *cur = buffer->cur; - uchar c; -+ const bool warn_bidi_p = pfile->warn_bidi_p (); - - cur++; - if (*cur == '/') -@@ -1189,7 +1508,11 @@ _cpp_skip_block_comment (cpp_reader *pfi - if (c == '/') - { - if (cur[-2] == '*') -- break; -+ { -+ if (warn_bidi_p) -+ maybe_warn_bidi_on_close (pfile, cur); -+ break; -+ } - - /* Warn about potential nested comments, but not if the '/' - comes immediately before the true comment delimiter. -@@ -1208,6 +1531,8 @@ _cpp_skip_block_comment (cpp_reader *pfi - { - unsigned int cols; - buffer->cur = cur - 1; -+ if (warn_bidi_p) -+ maybe_warn_bidi_on_close (pfile, cur); - _cpp_process_line_notes (pfile, true); - if (buffer->next_line >= buffer->rlimit) - return true; -@@ -1218,6 +1543,13 @@ _cpp_skip_block_comment (cpp_reader *pfi - - cur = buffer->cur; - } -+ /* If this is a beginning of a UTF-8 encoding, it might be -+ a bidirectional control character. */ -+ else if (__builtin_expect (c == bidi::utf8_start, 0) && warn_bidi_p) -+ { -+ bidi::kind kind = get_bidi_utf8 (cur - 1); -+ maybe_warn_bidi_on_char (pfile, cur, kind, /*ucn_p=*/false); -+ } - } - - buffer->cur = cur; -@@ -1233,9 +1565,31 @@ skip_line_comment (cpp_reader *pfile) - { - cpp_buffer *buffer = pfile->buffer; - location_t orig_line = pfile->line_table->highest_line; -+ const bool warn_bidi_p = pfile->warn_bidi_p (); - -- while (*buffer->cur != '\n') -- buffer->cur++; -+ if (!warn_bidi_p) -+ while (*buffer->cur != '\n') -+ buffer->cur++; -+ else -+ { -+ while (*buffer->cur != '\n' -+ && *buffer->cur != bidi::utf8_start) -+ buffer->cur++; -+ if (__builtin_expect (*buffer->cur == bidi::utf8_start, 0)) -+ { -+ while (*buffer->cur != '\n') -+ { -+ if (__builtin_expect (*buffer->cur == bidi::utf8_start, 0)) -+ { -+ bidi::kind kind = get_bidi_utf8 (buffer->cur); -+ maybe_warn_bidi_on_char (pfile, buffer->cur, kind, -+ /*ucn_p=*/false); -+ } -+ buffer->cur++; -+ } -+ maybe_warn_bidi_on_close (pfile, buffer->cur); -+ } -+ } - - _cpp_process_line_notes (pfile, true); - return orig_line != pfile->line_table->highest_line; -@@ -1317,11 +1671,13 @@ static const cppchar_t utf8_signifier = - - /* Returns TRUE if the sequence starting at buffer->cur is valid in - an identifier. FIRST is TRUE if this starts an identifier. */ -+ - static bool - forms_identifier_p (cpp_reader *pfile, int first, - struct normalize_state *state) - { - cpp_buffer *buffer = pfile->buffer; -+ const bool warn_bidi_p = pfile->warn_bidi_p (); - - if (*buffer->cur == '$') - { -@@ -1344,6 +1700,13 @@ forms_identifier_p (cpp_reader *pfile, i - cppchar_t s; - if (*buffer->cur >= utf8_signifier) - { -+ if (__builtin_expect (*buffer->cur == bidi::utf8_start, 0) -+ && warn_bidi_p) -+ { -+ bidi::kind kind = get_bidi_utf8 (buffer->cur); -+ maybe_warn_bidi_on_char (pfile, buffer->cur, kind, -+ /*ucn_p=*/false); -+ } - if (_cpp_valid_utf8 (pfile, &buffer->cur, buffer->rlimit, 1 + !first, - state, &s)) - return true; -@@ -1352,6 +1715,13 @@ forms_identifier_p (cpp_reader *pfile, i - && (buffer->cur[1] == 'u' || buffer->cur[1] == 'U')) - { - buffer->cur += 2; -+ if (warn_bidi_p) -+ { -+ bidi::kind kind = get_bidi_ucn (buffer->cur, -+ buffer->cur[-1] == 'U'); -+ maybe_warn_bidi_on_char (pfile, buffer->cur, kind, -+ /*ucn_p=*/true); -+ } - if (_cpp_valid_ucn (pfile, &buffer->cur, buffer->rlimit, 1 + !first, - state, &s, NULL, NULL)) - return true; -@@ -1460,6 +1830,7 @@ lex_identifier (cpp_reader *pfile, const - const uchar *cur; - unsigned int len; - unsigned int hash = HT_HASHSTEP (0, *base); -+ const bool warn_bidi_p = pfile->warn_bidi_p (); - - cur = pfile->buffer->cur; - if (! starts_ucn) -@@ -1483,6 +1854,8 @@ lex_identifier (cpp_reader *pfile, const - pfile->buffer->cur++; - } - } while (forms_identifier_p (pfile, false, nst)); -+ if (warn_bidi_p) -+ maybe_warn_bidi_on_close (pfile, pfile->buffer->cur); - result = _cpp_interpret_identifier (pfile, base, - pfile->buffer->cur - base); - *spelling = cpp_lookup (pfile, base, pfile->buffer->cur - base); -@@ -1719,6 +2092,7 @@ static void - lex_raw_string (cpp_reader *pfile, cpp_token *token, const uchar *base) - { - const uchar *pos = base; -+ const bool warn_bidi_p = pfile->warn_bidi_p (); - - /* 'tis a pity this information isn't passed down from the lexer's - initial categorization of the token. */ -@@ -1955,8 +2329,15 @@ lex_raw_string (cpp_reader *pfile, cpp_t - pos = base = pfile->buffer->cur; - note = &pfile->buffer->notes[pfile->buffer->cur_note]; - } -+ else if (__builtin_expect ((unsigned char) c == bidi::utf8_start, 0) -+ && warn_bidi_p) -+ maybe_warn_bidi_on_char (pfile, pos - 1, get_bidi_utf8 (pos - 1), -+ /*ucn_p=*/false); - } - -+ if (warn_bidi_p) -+ maybe_warn_bidi_on_close (pfile, pos); -+ - if (CPP_OPTION (pfile, user_literals)) - { - /* If a string format macro, say from inttypes.h, is placed touching -@@ -2051,15 +2432,27 @@ lex_string (cpp_reader *pfile, cpp_token - else - terminator = '>', type = CPP_HEADER_NAME; - -+ const bool warn_bidi_p = pfile->warn_bidi_p (); - for (;;) - { - cppchar_t c = *cur++; - - /* In #include-style directives, terminators are not escapable. */ - if (c == '\\' && !pfile->state.angled_headers && *cur != '\n') -- cur++; -+ { -+ if ((cur[0] == 'u' || cur[0] == 'U') && warn_bidi_p) -+ { -+ bidi::kind kind = get_bidi_ucn (cur + 1, cur[0] == 'U'); -+ maybe_warn_bidi_on_char (pfile, cur, kind, /*ucn_p=*/true); -+ } -+ cur++; -+ } - else if (c == terminator) -- break; -+ { -+ if (warn_bidi_p) -+ maybe_warn_bidi_on_close (pfile, cur - 1); -+ break; -+ } - else if (c == '\n') - { - cur--; -@@ -2076,6 +2469,11 @@ lex_string (cpp_reader *pfile, cpp_token - } - else if (c == '\0') - saw_NUL = true; -+ else if (__builtin_expect (c == bidi::utf8_start, 0) && warn_bidi_p) -+ { -+ bidi::kind kind = get_bidi_utf8 (cur - 1); -+ maybe_warn_bidi_on_char (pfile, cur - 1, kind, /*ucn_p=*/false); -+ } - } - - if (saw_NUL && !pfile->state.skipping) |