Bidirectional characters can cause security vulnerabilities, as
explained in the paper mentioned in a comment in this patch.
Therefore, we warn if such characters appear in string_literals,
character_literals, or comments.
Tested on x86_64-pc-linux-gnu, committed on trunk
gcc/ada/
* scng.adb (Check_Bidi): New procedure to give warning. Note
that this is called only for non-ASCII characters, so should not
be an efficiency issue.
(Slit): Call Check_Bidi for wide characters in string_literals.
(Minus_Case): Call Check_Bidi for wide characters in comments.
(Char_Literal_Case): Call Check_Bidi for wide characters in
character_literals. Move Accumulate_Checksum down, because
otherwise, if Err is True, the Code is uninitialized.
* errout.ads: Make the obsolete nature of "Insertion character
?" more prominent; one should not have to read several
paragraphs before finding out that it's obsolete.
diff --git a/gcc/ada/errout.ads b/gcc/ada/errout.ads
--- a/gcc/ada/errout.ads
+++ b/gcc/ada/errout.ads
@@ -275,7 +275,7 @@ package Errout is
-- contain subprograms to be inlined in the main program. It is also
-- used by the Compiler_Unit_Warning pragma for similar reasons.
- -- Insertion character ? (Question: warning message)
+ -- Insertion character ? (Question: warning message -- OBSOLETE)
-- The character ? appearing anywhere in a message makes the message
-- warning instead of a normal error message, and the text of the
-- message will be preceded by "warning:" in the normal case. The
@@ -302,7 +302,7 @@ package Errout is
-- clear that the continuation is part of a warning message, but it is
-- not necessary to go through any computational effort to include it.
--
- -- Note: this usage is obsolete, use ?? ?*? ?$? ?x? ?.x? ?_x? to
+ -- Note: this usage is obsolete; use ?? ?*? ?$? ?x? ?.x? ?_x? to
-- specify the string to be added when Warn_Doc_Switch is set to True.
-- If this switch is True, then for simple ? messages it has no effect.
-- This simple form is to ease transition and may be removed later
diff --git a/gcc/ada/scng.adb b/gcc/ada/scng.adb
--- a/gcc/ada/scng.adb
+++ b/gcc/ada/scng.adb
@@ -322,6 +322,49 @@ package body Scng is
-- Returns True if the scan pointer is pointing to the start of a wide
-- character sequence, does not modify the scan pointer in any case.
+ procedure Check_Bidi (Code : Char_Code);
+ -- Give a warning if Code is a bidirectional character, which can cause
+ -- security vulnerabilities. See the following article:
+ --
+ -- @article{boucher_trojansource_2021,
+ -- title = {Trojan {Source}: {Invisible} {Vulnerabilities}},
+ -- author = {Nicholas Boucher and Ross Anderson},
+ -- year = {2021},
+ -- journal = {Preprint},
+ -- eprint = {2111.00169},
+ -- archivePrefix = {arXiv},
+ -- primaryClass = {cs.CR},
+ -- url = {https://arxiv.org/abs/2111.00169}
+ -- }
+
+ ----------------
+ -- Check_Bidi --
+ ----------------
+
+ type Bidi_Characters is
+ (LRE, RLE, LRO, RLO, LRI, RLI, FSI, PDF, PDI);
+ Bidi_Character_Codes : constant array (Bidi_Characters) of Char_Code :=
+ (LRE => 16#202A#,
+ RLE => 16#202B#,
+ LRO => 16#202D#,
+ RLO => 16#202E#,
+ LRI => 16#2066#,
+ RLI => 16#2067#,
+ FSI => 16#2068#,
+ PDF => 16#202C#,
+ PDI => 16#2069#);
+ -- Above are the bidirectional characters, along with their Unicode code
+ -- points.
+
+ procedure Check_Bidi (Code : Char_Code) is
+ begin
+ for Bidi_Code of Bidi_Character_Codes loop
+ if Code = Bidi_Code then
+ Error_Msg ("??bidirectional wide character", Wptr);
+ end if;
+ end loop;
+ end Check_Bidi;
+
-----------------------
-- Double_Char_Token --
-----------------------
@@ -1070,6 +1113,8 @@ package body Scng is
if Err then
Error_Illegal_Wide_Character;
Code := Get_Char_Code (' ');
+ else
+ Check_Bidi (Code);
end if;
Accumulate_Checksum (Code);
@@ -1611,11 +1656,11 @@ package body Scng is
elsif Start_Of_Wide_Character then
declare
- Wptr : constant Source_Ptr := Scan_Ptr;
Code : Char_Code;
Err : Boolean;
begin
+ Wptr := Scan_Ptr;
Scan_Wide (Source, Scan_Ptr, Code, Err);
-- If not well formed wide character, then just skip
@@ -1629,6 +1674,8 @@ package body Scng is
elsif Is_UTF_32_Line_Terminator (UTF_32 (Code)) then
Scan_Ptr := Wptr;
exit;
+ else
+ Check_Bidi (Code);
end if;
end;
@@ -1736,7 +1783,6 @@ package body Scng is
if Start_Of_Wide_Character then
Wptr := Scan_Ptr;
Scan_Wide (Source, Scan_Ptr, Code, Err);
- Accumulate_Checksum (Code);
if Err then
Error_Illegal_Wide_Character;
@@ -1752,8 +1798,12 @@ package body Scng is
Error_Msg -- CODEFIX
("(Ada 2005) non-graphic character not permitted " &
"in character literal", Wptr);
+ else
+ Check_Bidi (Code);
end if;
+ Accumulate_Checksum (Code);
+
if Source (Scan_Ptr) /= ''' then
Error_Msg_S ("missing apostrophe");
else