> -----Original Message----- > From: Richard Sandiford <richard.sandif...@arm.com> > Sent: Friday, April 21, 2023 6:19 PM > To: Tamar Christina <tamar.christ...@arm.com> > Cc: gcc-patches@gcc.gnu.org; nd <n...@arm.com>; Richard Earnshaw > <richard.earns...@arm.com> > Subject: Re: [PATCH] RFC: New compact syntax for insn and insn_split in > Machine Descriptions > > Tamar Christina <tamar.christ...@arm.com> writes: > > Hi All, > > > > This patch adds support for a compact syntax for specifying > > constraints in instruction patterns. Credit for the idea goes to Richard > Earnshaw. > > > > I am sending up this RFC to get feedback for it's inclusion in GCC 14. > > With this new syntax we want a clean break from the current > > limitations to make something that is hopefully easier to use and maintain. > > > > The idea behind this compact syntax is that often times it's quite > > hard to correlate the entries in the constrains list, attributes and > > instruction > lists. > > > > One has to count and this often is tedious. Additionally when > > changing a single line in the insn multiple lines in a diff change, > > making it harder to see what's going on. > > > > This new syntax takes into account many of the common things that are > done in MD > > files. It's also worth saying that this version is intended to deal with > > the > > common case of a string based alternatives. For C chunks we have some > ideas > > but those are not intended to be addressed here. > > > > It's easiest to explain with an example: > > > > normal syntax: > > > > (define_insn_and_split "*movsi_aarch64" > > [(set (match_operand:SI 0 "nonimmediate_operand" "=r,k,r,r,r,r, r,w, m, m, > r, r, r, w,r,w, w") > > (match_operand:SI 1 "aarch64_mov_operand" " > r,r,k,M,n,Usv,m,m,rZ,w,Usw,Usa,Ush,rZ,w,w,Ds"))] > > "(register_operand (operands[0], SImode) > > || aarch64_reg_or_zero (operands[1], SImode))" > > "@ > > mov\\t%w0, %w1 > > mov\\t%w0, %w1 > > mov\\t%w0, %w1 > > mov\\t%w0, %1 > > # > > * return aarch64_output_sve_cnt_immediate (\"cnt\", \"%x0\", > operands[1]); > > ldr\\t%w0, %1 > > ldr\\t%s0, %1 > > str\\t%w1, %0 > > str\\t%s1, %0 > > adrp\\t%x0, %A1\;ldr\\t%w0, [%x0, %L1] > > adr\\t%x0, %c1 > > adrp\\t%x0, %A1 > > fmov\\t%s0, %w1 > > fmov\\t%w0, %s1 > > fmov\\t%s0, %s1 > > * return aarch64_output_scalar_simd_mov_immediate (operands[1], > SImode);" > > "CONST_INT_P (operands[1]) && !aarch64_move_imm (INTVAL > (operands[1]), SImode) > > && REG_P (operands[0]) && GP_REGNUM_P (REGNO (operands[0]))" > > [(const_int 0)] > > "{ > > aarch64_expand_mov_immediate (operands[0], operands[1]); > > DONE; > > }" > > ;; The "mov_imm" type for CNT is just a placeholder. > > [(set_attr "type" > "mov_reg,mov_reg,mov_reg,mov_imm,mov_imm,mov_imm,load_4, > > > load_4,store_4,store_4,load_4,adr,adr,f_mcr,f_mrc,fmov,neon_move") > > (set_attr "arch" "*,*,*,*,*,sve,*,fp,*,fp,*,*,*,fp,fp,fp,simd") > > (set_attr "length" "4,4,4,4,*, 4,4, 4,4, 4,8,4,4, 4, 4, 4, 4") > > ] > > ) > > > > New syntax: > > > > (define_insn_and_split "*movsi_aarch64" > > [(set (match_operand:SI 0 "nonimmediate_operand") > > (match_operand:SI 1 "aarch64_mov_operand"))] > > "(register_operand (operands[0], SImode) > > || aarch64_reg_or_zero (operands[1], SImode))" > > "@@ (cons: 0 1; attrs: type arch length) > > [=r, r ; mov_reg , * , 4] mov\t%w0, %w1 > > [k , r ; mov_reg , * , 4] ^ > > [r , k ; mov_reg , * , 4] ^ > > [r , M ; mov_imm , * , 4] mov\t%w0, %1 > > [r , n ; mov_imm , * , *] # > > [r , Usv; mov_imm , sve , 4] << aarch64_output_sve_cnt_immediate ('cnt', > '%x0', operands[1]); > > [r , m ; load_4 , * , 4] ldr\t%w0, %1 > > [w , m ; load_4 , fp , 4] ldr\t%s0, %1 > > [m , rZ ; store_4 , * , 4] str\t%w1, %0 > > [m , w ; store_4 , fp , 4] str\t%s1, %0 > > [r , Usw; load_4 , * , 8] adrp\t%x0, %A1;ldr\t%w0, [%x0, %L1] > > [r , Usa; adr , * , 4] adr\t%x0, %c1 > > [r , Ush; adr , * , 4] adrp\t%x0, %A1 > > [w , rZ ; f_mcr , fp , 4] fmov\t%s0, %w1 > > [r , w ; f_mrc , fp , 4] fmov\t%w0, %s1 > > [w , w ; fmov , fp , 4] fmov\t%s0, %s1 > > [w , Ds ; neon_move, simd, 4] << > aarch64_output_scalar_simd_mov_immediate (operands[1], SImode);" > > "CONST_INT_P (operands[1]) && !aarch64_move_imm (INTVAL > (operands[1]), SImode) > > && REG_P (operands[0]) && GP_REGNUM_P (REGNO (operands[0]))" > > [(const_int 0)] > > { > > aarch64_expand_mov_immediate (operands[0], operands[1]); > > DONE; > > } > > ;; The "mov_imm" type for CNT is just a placeholder. > > ) > > > > The patch contains some more rewritten examples for both Arm and > > AArch64. I have included them for examples in this RFC but the final > > version posted in GCC 14 will have these split out. > > > > The main syntax rules are as follows (See docs for full rules): > > - Template must start with "@@" to use the new syntax. > > - "@@" is followed by a layout in parentheses which is "cons:" followed by > > a list of match_operand/match_scratch IDs, then a semicolon, then the > > same for attributes ("attrs:"). Both sections are optional (so you can > > use only cons, or only attrs, or both), and cons must come before attrs > > if present. > > - Each alternative begins with any amount of whitespace. > > - Following the whitespace is a comma-separated list of constraints and/or > > attributes within brackets [], with sections separated by a semicolon. > > - Following the closing ']' is any amount of whitespace, and then the > > actual > > asm output. > > - Spaces are allowed in the list (they will simply be removed). > > - All alternatives should be specified: a blank list should be > > "[,,]", "[,,;,]" etc., not "[]" or "" (however genattr may segfault if > > you leave certain attributes empty, I have found). > > - The actual constraint string in the match_operand or match_scratch, and > > the attribute string in the set_attr, must be blank or an empty string > > (you can't combine the old and new syntaxes). > > - The common idion * return can be shortened by using <<. > > - Any unexpanded iterators left during processing will result in an error > > at > > compile time. If for some reason <> is needed in the output then these > > must be escaped using \. > > - Inside a @@ block '' is treated as "" when there are multiple characters > > inside the single quotes. This version does not handle multi byte > > literals > > like specifying characters as their numerical encoding, like \003 nor > > does > > it handle unicode, especially multibyte encodings. This feature may be > more > > trouble than it's worth so have no finished it off, however this means > > one > > can use 'foo' instead of \"foo\" to denote a multicharacter string. > > - Inside an @@ block any unexpanded iterators will result in a compile > > time > > fault instead of incorrect assembly being generated at runtime. If the > > literal <> is needed in the output this needs to be escaped with \<\>. > > - This check is not performed inside C blocks (lines starting with *). > > - Instead of copying the previous instruction again in the next pattern, > > one > > can use ^ to refer to the previous asm string. > > Thanks for doing this. The new syntax seems like a clear improvement for > complex patterns like movs. > > Some comments/suggestions: > > - From a style perspective, out-of-order constraints should IMO be strongly > discouraged. The asm string uses %0, %1, %2 etc. to refer to operands, > and having that directly after a list that puts the constraints in > a different order (such as [%2, %0, %1]) would IMO be very confusing. > > I agree there might be cases where dropping constraints makes sense. > But I think in general we should encourage all constraints to be > specified, and be specified in order. And that's likely to be the > natural choice in an overwhelming majority of cases anyway. > > So how about having a simpler syntax for the first line when all > constraints are specified in order? Maybe just "cons" (without the > colon or numbers). > > - I'm not too keen on the '' thing. It sounded from internal > discussion like backslashes and quoting were a problem generally. > > Would it work to quote the new form in {@ ... } instead? There should > be no compatibility problem with that, since @ isn't a standard C++ > lexing token.
Fair enough, did you mean {@<string>} or @'string' ? Just so I understand before implementing 😊 > > - Could we support a comment syntax? E.g. ignore lines beginning with > ;; or // (or both)? In the example above, it would be good to keep > the comment about the CNT type attribute nearer to the attribute itself. Fair point. > > - Very minor, but using [...] rather than (...) for the first line > might make it more visually obvious that it's acting as a table > header for the [...] rows. Thanks, Will send new version out soon. Thanks, Tamar > > Haven't done a detailed review of the gensupport bits, but: > > > [...] > > @@ -700,12 +702,37 @@ process_template (class data *d, const char > *template_code) > > if (sp != ep) > > message_at (d->loc, "trailing whitespace in output template"); > > > > - while (cp < sp) > > + /* Check for any unexpanded iterators. */ > > + std::string buff (cp, sp - cp); > > + if (bp[0] != '*' && d->compact_syntax_p) > > { > > - putchar (*cp); > > - cp++; > > + size_t start = buff.find ('<'); > > + size_t end = buff.find ('>', start + 1); > > + if (end != std::string::npos || start != std::string::npos) > > + { > > + if (end == std::string::npos || start == std::string::npos) > > + fatal_at (d->loc, "unmatched angle brackets, likely an " > > + "error in iterator syntax in %s", buff.c_str ()); > > + > > + if (start != 0 > > + && buff[start-1] == '\\' > > + && buff[end-1] == '\\') > > + { > > + /* Found a valid escape sequence, erase the characters for > > + output. */ > > + buff.erase (end-1, 1); > > + buff.erase (start-1, 1); > > + } > > + else > > + fatal_at (d->loc, "unresolved iterator '%s' in '%s'", > > + buff.substr(start+1, end - start-1).c_str (), > > + buff.c_str ()); > > + } > > } > > Asm strings that want unbalanced but quoted < or > should be able to use > them, so the check for backslashes should probably come first. > I suppose this also runs into the classic problem of whether the preceding > backslash was itself quoted, etc. > > So maybe it would make sense to walk through character-by-character, > something like: > > const char *p = cp; > const char *last_bracket = nullptr; > while (p < sp) > { > if (*p == '\\' && p + 1 < sp) > { > p += 2; > continue; > } > if (*p == '>' && last_bracket && *last_bracket == '<') > ... unexpanded iterator ... > else if (*p == '<' || *p == '>') > last_bracket = p; > p += 1; > } > if (last_bracket) > ... error ... > > That also copes with unlikely things like \<...\>...<foo>, where an unexpanded > iterator (or incorrectly quoted <...>) comes after a correctly-quoted <...>. > > Thanks, > Richard