Hello,

This is an alternative proposal to the "asm inline" feature.

Kernel developers have reported suboptimal optimization where use of asm
statements such as

  asm("ud2\n"
      ".pushsection foo\n"
      ...
      ".popsection\n" : : ...)

impacts inlining decisions badly, since GCC assumes cost of the asm to be
high, even though it emits just one instruction to the text section. I'd
like to point out that branch range optimization is also negatively affected.

I suggest we give asm writers a way to mark portions of the asm template
that should be ignored in cost estimation. This is a more fine-grained
mechanism compared to 'asm inline', and it also helps branch range optimization.

Specifically, I propose that in Extended asms, percent-backtick (%`) is
recognized as such region boundary. Percent sign is of course always special
in Extended asms, and backtick sign is not claimed by any backend.

For Basic asms, no similar mechanism is necessary since they are antithetical
to efficiency in the first place.

Kernels developers can then use this extension via

[if gcc-9 or compatible]
#define ASM_NONTEXT_BEGIN "%`\n"
[else]
#define ASM_NONTEXT_BEGIN "\n"
[endif]

#define ASM_NONTEXT_END ASM_NONTEXT_BEGIN

  asm("ud2\n"
      ASM_NONTEXT_BEGIN
      ".pushsection foo\n"
      ...
      ".popsection\n"
      ASM_NONTEXT_END : : ...)

How does this look?

        * doc/extend.texi (Extended Asm): Document %` in template.
        (Size of an Asm): Document intended use of %`.
        * final.c (asm_insn_count): Adjust.
        (asm_str_count): Add argument to distinguish basic and extended asms.
        In extended asms, ignore separators inside of %` ... %`.
        (output_asm_insn): Handle %`.
        * rtl.h (asm_str_count): Adjust prototype.
        * tree-inline.c (estimate_num_insns): Adjust.
        * config/arm/arm.c (arm_rtx_costs_internal): Adjust.
        
diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
index cfe6a8e5bb8..798d310061c 100644
--- a/gcc/doc/extend.texi
+++ b/gcc/doc/extend.texi
@@ -8613,6 +8613,11 @@ generates multiple assembler instructions.
 Outputs @samp{@{}, @samp{|}, and @samp{@}} characters (respectively)
 into the assembler code.  When unescaped, these characters have special
 meaning to indicate multiple assembler dialects, as described below.
+
+@item %`
+Signifies a boundary of a region where instruction separators are not
+counted towards its cost (@pxref{Size of an asm}). Must be followed by
+a whitespace character.
 @end table
 
 @subsubheading Multiple assembler dialects in @code{asm} templates
@@ -9821,7 +9826,7 @@ does this by counting the number of instructions in the 
pattern of the
 @code{asm} and multiplying that by the length of the longest
 instruction supported by that processor.  (When working out the number
 of instructions, it assumes that any occurrence of a newline or of
-whatever statement separator character is supported by the assembler --
+whatever statement separator character is supported by the assembler ---
 typically @samp{;} --- indicates the end of an instruction.)
 
 Normally, GCC's estimate is adequate to ensure that correct
@@ -9832,6 +9837,15 @@ space in the object file than is needed for a single 
instruction.
 If this happens then the assembler may produce a diagnostic saying that
 a label is unreachable.
 
+Likewise, it is possible for GCC to significantly over-estimate the
+number of instructions in an @code{asm}, resulting in suboptimal decisions
+when the estimate is used during inlining and branch range optimization.
+This can happen if the @code{asm} template has many lines that do not
+correspond to instructions, for example when the @samp{.pushsection}
+directive is used to emit auxiliary data in a non-code section.
+For Extended @code{asm} statements, you can improve the estimate by
+wrapping the non-code portion in @samp{%` ... %`} delimiters.
+
 @node Alternate Keywords
 @section Alternate Keywords
 @cindex alternate keywords
diff --git a/gcc/final.c b/gcc/final.c
index 6943c073d9b..dc474d7f6e1 100644
--- a/gcc/final.c
+++ b/gcc/final.c
@@ -1408,29 +1408,40 @@ static int
 asm_insn_count (rtx body)
 {
   const char *templ;
+  bool basic = GET_CODE (body) == ASM_INPUT;
 
-  if (GET_CODE (body) == ASM_INPUT)
+  if (basic)
     templ = XSTR (body, 0);
   else
     templ = decode_asm_operands (body, NULL, NULL, NULL, NULL, NULL);
 
-  return asm_str_count (templ);
+  return asm_str_count (templ, basic);
 }
 
 /* Return the number of machine instructions likely to be generated for the
-   inline-asm template. */
+   inline-asm template.  BASIC indicates if it is used in a basic asm.  */
 int
-asm_str_count (const char *templ)
+asm_str_count (const char *templ, bool basic)
 {
   int count = 1;
+  bool in_backticks = false;
 
   if (!*templ)
     return 0;
 
   for (; *templ; templ++)
-    if (IS_ASM_LOGICAL_LINE_SEPARATOR (*templ, templ)
-       || *templ == '\n')
-      count++;
+    if (*templ == '%' && !basic)
+      {
+       templ++;
+       if (!*templ)
+         break;
+       /* Separators inside %` ... %` are not counted.  */
+       if (*templ == '`')
+         in_backticks = !in_backticks;
+      }
+    else if (IS_ASM_LOGICAL_LINE_SEPARATOR (*templ, templ)
+            || *templ == '\n')
+      count += !in_backticks;
 
   return count;
 }
@@ -3836,6 +3847,7 @@ output_asm_insn (const char *templ, rtx *operands)
   int oporder[MAX_RECOG_OPERANDS];
   char opoutput[MAX_RECOG_OPERANDS];
   int ops = 0;
+  bool in_backticks = false;
 
   /* An insn may return a null string template
      in a case where no assembler code is needed.  */
@@ -3902,6 +3914,17 @@ output_asm_insn (const char *templ, rtx *operands)
            p++;
            fprintf (asm_out_file, "%d", insn_counter);
          }
+       /* %` guards parts of the template that should not participate in
+          estimating its cost, e.g. where it switches to a non-text section.
+          It does not result in any output.  */
+       else if (*p == '`')
+         {
+           p++;
+           in_backticks = !in_backticks;
+           /* Leave room for future extensions.  */
+           if (*p && !ISSPACE (*p))
+             output_operand_lossage ("%%` must be followed by whitespace");
+         }
        /* % followed by a letter and some digits
           outputs an operand in a special way depending on the letter.
           Letters `acln' are implemented directly.
@@ -3995,6 +4018,9 @@ output_asm_insn (const char *templ, rtx *operands)
     output_asm_name ();
 
   putc ('\n', asm_out_file);
+
+  if (in_backticks)
+    output_operand_lossage ("missing closing %%`");
 }
 
 /* Output a LABEL_REF, or a bare CODE_LABEL, as an assembler symbol.  */
diff --git a/gcc/rtl.h b/gcc/rtl.h
index 68d3ceab29f..d29229d2817 100644
--- a/gcc/rtl.h
+++ b/gcc/rtl.h
@@ -4288,7 +4288,7 @@ extern void simplify_using_condition (rtx, rtx *, bitmap);
 /* In final.c  */
 extern unsigned int compute_alignments (void);
 extern void update_alignments (vec<rtx> &);
-extern int asm_str_count (const char *templ);
+extern int asm_str_count (const char *, bool);
 
 struct rtl_hooks
 {
diff --git a/gcc/tree-inline.c b/gcc/tree-inline.c
index 9352acc8af6..890df5f86de 100644
--- a/gcc/tree-inline.c
+++ b/gcc/tree-inline.c
@@ -4103,7 +4103,9 @@ estimate_num_insns (gimple *stmt, eni_weights *weights)
 
     case GIMPLE_ASM:
       {
-       int count = asm_str_count (gimple_asm_string (as_a <gasm *> (stmt)));
+       gasm *asm_stmt = as_a <gasm *> (stmt);
+       const char *templ = gimple_asm_string (asm_stmt);
+       int count = asm_str_count (templ, gimple_asm_input_p (asm_stmt));
        /* 1000 means infinity. This avoids overflows later
           with very long asm statements.  */
        if (count > 1000)
diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
index 6332e68df05..f48129ae076 100644
--- a/gcc/config/arm/arm.c
+++ b/gcc/config/arm/arm.c
@@ -11026,7 +11026,8 @@ arm_rtx_costs_internal (rtx x, enum rtx_code code, enum 
rtx_code outer_code,
       /* Just a guess.  Guess number of instructions in the asm
          plus one insn per input.  Always a minimum of COSTS_N_INSNS (1)
          though (see PR60663).  */
-        int asm_length = MAX (1, asm_str_count (ASM_OPERANDS_TEMPLATE (x)));
+        const char *templ = ASM_OPERANDS_TEMPLATE (x);
+        int asm_length = MAX (1, asm_str_count (templ, false));
         int num_operands = ASM_OPERANDS_INPUT_LENGTH (x);
 
         *cost = COSTS_N_INSNS (asm_length + num_operands);

Reply via email to