Looks good! Just some minor comments: Tamar Christina <tamar.christ...@arm.com> writes: > diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi > index > 6a435eb44610960513e9739ac9ac1e8a27182c10..1437ab55b260ab5c876e92d59ba39d24bffc6276 > 100644 > --- a/gcc/doc/md.texi > +++ b/gcc/doc/md.texi > @@ -27,6 +27,7 @@ See the next chapter for information on the C header file. > from such an insn. > * Output Statement:: For more generality, write C code to output > the assembler code. > +* Compact Syntax:: Compact syntax for writing Machine descriptors.
s/Machine/machine/ > * Predicates:: Controlling what kinds of operands can be used > for an insn. > * Constraints:: Fine-tuning operand selection. > @@ -713,6 +714,213 @@ you can use @samp{*} inside of a @samp{@@} > multi-alternative template: > @end group > @end smallexample > > +@node Compact Syntax > +@section Compact Syntax > +@cindex compact syntax > + > +In cases where the number of alternatives in a @code{define_insn} or > +@code{define_insn_and_split} are large then it may be beneficial to use the > +compact syntax when specifying alternatives. > + > +This syntax puts the constraints and attributes on the same horizontal line > as > +the instruction assembly template. > + > +As an example > + > +@smallexample > +@group > +(define_insn_and_split "" > + [(set (match_operand:SI 0 "nonimmediate_operand" "=r,k,r,r,r,r") > + (match_operand:SI 1 "aarch64_mov_operand" " r,r,k,M,n,Usv"))] > + "" > + "@@ > + mov\\t%w0, %w1 > + mov\\t%w0, %w1 > + mov\\t%w0, %w1 > + mov\\t%w0, %1 > + # > + * return aarch64_output_sve_cnt_immediate ('cnt', '%x0', operands[1]);" > + "&& true" > + [(const_int 0)] > + @{ > + aarch64_expand_mov_immediate (operands[0], operands[1]); > + DONE; > + @} > + [(set_attr "type" "mov_reg,mov_reg,mov_reg,mov_imm,mov_imm,mov_imm") > + (set_attr "arch" "*,*,*,*,*,sve") > + (set_attr "length" "4,4,4,4,*, 4") > +] > +) > +@end group > +@end smallexample > + > +can be better expressed as: > + > +@smallexample > +@group > +(define_insn_and_split "" > + [(set (match_operand:SI 0 "nonimmediate_operand") > + (match_operand:SI 1 "aarch64_mov_operand"))] > + "" > + @{@@ [cons: =0, 1; attrs: type, arch, length] > + [r , r ; mov_reg , * , 4] mov\t%w0, %w1 > + [k , r ; mov_reg , * , 4] ^ > + [r , k ; mov_reg , * , 4] ^ > + [r , M ; mov_imm , * , 4] mov\t%w0, %1 > + [r , n ; mov_imm , * , *] # > + [r , Usv; mov_imm , sve , 4] << aarch64_output_sve_cnt_immediate > ("cnt", "%x0", operands[1]); > + @} > + "&& true" > + [(const_int 0)] > + @{ > + aarch64_expand_mov_immediate (operands[0], operands[1]); > + DONE; > + @} > +) > +@end group > +@end smallexample > + > +The syntax rules are as follows: > +@itemize @bullet > +@item > +Template must start with "@{@@" to use the new syntax. s/Template/Templates/ or s/Template/The template/ @{@@ should be quoted using @samp{...} rather than "...". Same for later instances. > + > +@item > +"@{@@" is followed by a layout in parentheses which is @samp{"cons:"} > followed by > +a list of @code{match_operand}/@code{match_scratch} comma operand numbers, > then a > +semicolon, followed by the same for attributes (@samp{"attrs:"}). Operand No "..." needed for cons: and attrs: (@samp is enough) > +modifiers can be placed in this section group as well. Both sections > +are optional (so you can use only @samp{cons}, or only @samp{attrs}, or > both), > +and @samp{cons} must come before @samp{attrs} if present. > + > +@item > +Each alternative begins with any amount of whitespace. > + > +@item > +Following the whitespace is a comma-separated list of @samp{constraints} > and/or > +@samp{attributes} within brackets @code{[]}, with sections separated by a I think "constraints" and "attributes" should be unquoted here, rather than @samp. > +semicolon. > + > +@item > +Should you want to copy the previous asm line, the symbol @code{^} can be > used. > +This allows less copy pasting between alternative and reduces the number of > +lines to update on changes. > + > +@item > +When using C functions for output, the idiom @code{* return <function>;} can > be I think this should be @samp rather than @code, since it's quoting a sample rather than a single entity (but I don't know texinfo well, so could be wrong). s/<function>/@var{function}/ > +replaced with the shorthand @code{<< <function>;}. > + > +@item > +Following the closing ']' is any amount of whitespace, and then the actual > asm @samp or @code here too > +output. > + > +@item > +Spaces are allowed in the list (they will simply be removed). > + > +@item > +All alternatives should be specified: a blank list should be "[,,]", "[,,;,]" > +etc., not "[]" or "". @samp for these too. I don't think @samp{} prints anything, so "" probably needs to be described in words. Maybe s/All alternatives/All constraint alternatives/? Attribute alternatives should generally use "*" rather than be blank. > + > +@item > +Within an @{@@ block both multiline and singleline C comments are allowed, > but @samp quoting here too > +when used outside of a C block they must be the only non-whitespace blocks on > +the line. > + > +@item > +Any unexpanded iterators within the block will result in a compile time error > +rather than accepting the generating the @code{<..>} in the output asm. If > the Typo: "than accepting the generating the". > +literal @code{<..>} is required it should be escaped as @code{\<..\>}. > + > +@item > +Within an @{@@ block, any iterators that do not get expanded will result in > an > +error. If for some reason it is required to have @code{<>} in the output > then > +these must be escaped using @backslashchar{}. It sounds like these last two bullet points are saying the same thing. If they do say the same thing, the second one seems clearer to me FWIW. > +@item > +The actual constraint string in the @code{match_operand} or > +@code{match_scratch}, and the attribute string in the @code{set_attr}, must > be > +blank or an empty string (you can't combine the old and new syntaxes). > + > +@item > +@code{set_attr} are optional. If a @code{set_attr} is defined in the > +@samp{attrs} section then that declaration can be both definition and > +declaration. If both @samp{attrs} and @code{set_attr} are defined for the > same > +entry then the attribute string must be empty or blank. Took me a couple of reads to get this. How about: @code{set_attr}s are optional if the template includes an @code{attrs} section. If there is a @code{set_attr} for an attribute that is mentioned in the @code{attrs} section, the @code{set_attr} @var{value-string} must be empty or blank. (Although see below for a comment about this kind of placeholder.) > +@item > +Additional @code{set_attr} can be specified other than the ones in the > +@samp{attrs} list. These must use the @samp{normal} syntax and must be > defined Should be no @samp{} around "normal". > +after all @samp{attrs} specified. How about "must come last"? > + > +In other words, the following are valid: > +@smallexample > +@group > +(define_insn_and_split "" > + [(set (match_operand:SI 0 "nonimmediate_operand") > + (match_operand:SI 1 "aarch64_mov_operand"))] > + "" > + @{@@ [cons: 0, 1; attrs: type, arch, length]@} > + ... @dots{} for ellipsis. Same for the other examples. > + [(set_attr "type")] > + [(set_attr "arch")] > + [(set_attr "length")] > + [(set_attr "foo" "mov_imm")] Too many [ and ], should be: [(set_attr "type") (set_attr "arch") (set_attr "length") (set_attr "foo" "mov_imm")] Same for the other examples. > diff --git a/gcc/genoutput.cc b/gcc/genoutput.cc > index > 163e8dfef4ca2c2c92ce1cf001ee6be40a54ca3e..8ac62dc37edf4c095d694e5c7caa4499cf201334 > 100644 > --- a/gcc/genoutput.cc > +++ b/gcc/genoutput.cc > @@ -91,6 +91,7 @@ along with GCC; see the file COPYING3. If not see > #include "errors.h" > #include "read-md.h" > #include "gensupport.h" > +#include <string> This include isn't needed any more. > > /* No instruction can have more operands than this. Sorry for this > arbitrary limit, but what machine will have an instruction with > @@ -157,6 +158,7 @@ public: > int n_alternatives; /* Number of alternatives in each > constraint */ > int operand_number; /* Operand index in the big array. */ > int output_format; /* INSN_OUTPUT_FORMAT_*. */ > + bool compact_syntax_p; > struct operand_data operand[MAX_MAX_OPERANDS]; > }; > > @@ -700,12 +702,57 @@ process_template (class data *d, const char > *template_code) > if (sp != ep) > message_at (d->loc, "trailing whitespace in output template"); > > - while (cp < sp) > + /* Check for any unexpanded iterators. */ > + if (bp[0] != '*' && d->compact_syntax_p) I assume the bp[0] != '*' condition skips the check for C code blocks. Genuine question, but are you sure we want that? C code often includes asm strings (in quotes), such as for the SVE CNT[BHWD] example. Extending the check would mean that any use of <...> for C++ templates will need to be quoted, but explicit instantiation is pretty rare in .md files. It would also look weird for conditions. Either way is fine, just asking. > { > - putchar (*cp); > - cp++; > + const char *p = cp; > + const char *last_bracket = nullptr; > + while (p < sp) > + { > + if (*p == '\\' && p + 1 < sp) > + { > + putchar (*p); > + putchar (*(p+1)); > + p += 2; > + continue; > + } > + > + if (*p == '>' && last_bracket && *last_bracket == '<') > + { > + size_t len = p - last_bracket; > + char *iter = XNEWVEC (char, len); > + memcpy (iter, last_bracket+1, (size_t)(len - 1)); > + char *nl = strchr (const_cast<char*> (cp), '\n'); > + if (nl) > + *nl ='\0'; > + iter[len - 1] = '\0'; > + fatal_at (d->loc, "unresolved iterator '%s' in '%s'", > + iter, cp); I think we could just use: fatal_at (d->loc, "unresolved iterator '%.*s' in '%s'", len - 1, last_bracket + 1, cp); and avoid the copy. > + } > + else if (*p == '<' || *p == '>') > + last_bracket = p; > + > + putchar (*p); > + p += 1; > + } > + > + if (last_bracket) > + { > + char *nl = strchr (const_cast<char*> (cp), '\n'); > + if (nl) > + *nl ='\0'; > + fatal_at (d->loc, "unmatched angle brackets, likely an " > + "error in iterator syntax in %s", cp); > + } > + } > + else > + { > + while (cp < sp) > + putchar (*(cp++)); > } > > + cp = sp; > + > if (!found_star) > puts ("\","); > else if (*bp != '*') > @@ -881,6 +928,8 @@ gen_insn (md_rtx_info *info) > else > d->name = 0; > > + d->compact_syntax_p = compact_syntax.contains (insn); > + > /* Build up the list in the same order as the insns are seen > in the machine description. */ > d->next = 0; > diff --git a/gcc/gensupport.h b/gcc/gensupport.h > index > a1edfbd71908b6244b40f801c6c01074de56777e..7925e22ed418767576567cad583bddf83c0846b1 > 100644 > --- a/gcc/gensupport.h > +++ b/gcc/gensupport.h > @@ -20,6 +20,7 @@ along with GCC; see the file COPYING3. If not see > #ifndef GCC_GENSUPPORT_H > #define GCC_GENSUPPORT_H > > +#include "hash-set.h" > #include "read-md.h" > > struct obstack; > @@ -218,6 +219,8 @@ struct pattern_stats > int num_operand_vars; > }; > > +extern hash_set<rtx> compact_syntax; > + > extern void get_pattern_stats (struct pattern_stats *ranges, rtvec vec); > extern void compute_test_codes (rtx, file_location, char *); > extern file_location get_file_location (rtx); > diff --git a/gcc/gensupport.cc b/gcc/gensupport.cc > index > f9efc6eb7572a44b8bb154b0b22be3815bd0d244..f1d6b512356844da5d1dadbc69e08c16ef7a3abd > 100644 > --- a/gcc/gensupport.cc > +++ b/gcc/gensupport.cc > @@ -27,12 +27,17 @@ > #include "read-md.h" > #include "gensupport.h" > #include "vec.h" > +#include <string> > +#include <vector> > +#include <ctype.h> > > #define MAX_OPERANDS 40 > > static rtx operand_data[MAX_OPERANDS]; > static rtx match_operand_entries_in_pattern[MAX_OPERANDS]; > static char used_operands_numbers[MAX_OPERANDS]; > +/* List of entries which are part of the new syntax. */ > +hash_set<rtx> compact_syntax; Might as well make this static too, like the other vars. > > > /* In case some macros used by files we include need it, define this here. > */ > @@ -545,6 +550,569 @@ gen_rewrite_sequence (rtvec vec) > return new_vec; > } > > +/* The following is for handling the compact syntax for constraints and > + attributes. > + > + The normal syntax looks like this: > + > + ... > + (match_operand: 0 "s_register_operand" "r,I,k") > + (match_operand: 2 "s_register_operand" "r,k,I") > + ... > + "@ > + <asm> > + <asm> > + <asm>" > + ... > + (set_attr "length" "4,8,8") > + > + The compact syntax looks like this: > + > + ... > + (match_operand: 0 "s_register_operand") > + (match_operand: 2 "s_register_operand") > + ... > + {@ [cons: 0, 2; attrs: length] > + [r,r; 4] <asm> > + [I,k; 8] <asm> > + [k,I; 8] <asm> > + } > + ... > + (set_attr "length") > + > + This is the only place where this syntax needs to be handled. Relevant > + patterns are transformed from compact to the normal syntax before they are > + queued, so none of the gen* programs need to know about this syntax at > all. > + > + Conversion process (convert_syntax): > + > + 0) Check that pattern actually uses new syntax (check for {@ ... }). > + > + 1) Get the "layout", i.e. the "(cons: 0 2; attrs: length)" from the above Now "[cons: 0, 2; attrs: length]" > + example. cons must come first; both are optional. Set up two vecs, > + convec and attrvec, for holding the results of the transformation. > + > + 2) For each alternative: parse the list of constraints and/or attributes, > + and enqueue them in the relevant lists in convec and attrvec. By the > end > + of this process, convec[N].con and attrvec[N].con should contain > regular > + syntax constraint/attribute lists like "r,I,k". Copy the asm to a > string > + as we go. > + > + 3) Search the rtx and write the constraint and attribute lists into the > + correct places. Write the asm back into the template. */ > + > +/* Helper class for shuffling constraints/attributes in convert_syntax and > + add_constraints/add_attributes. This includes commas but not whitespace. > */ > + > +class conlist { > +private: > + std::string con; > + > +public: > + std::string name; > + std::string modifier; > + int idx = -1; > + > + conlist () > + { > + } Now that we're C++11, it's probably more canonical to use: conlist () = default; > + > + /* [ns..ns + len) should be a string with the id of the rtx to match > + i.e. if rtx is the relevant match_operand or match_scratch then > + [ns..ns + len) should equal itoa (XINT (rtx, 0)), and if set_attr then > + [ns..ns + len) should equal XSTR (rtx, 0). */ > + conlist (const char *ns, unsigned int len, bool numeric) > + { > + /* Trim leading whitespaces. */ > + while (*ns == ' ' || *ns == '\t') I think ISSPACE (*ns) is preferred here > + { > + ns++; > + len--; > + } > + > + /* Trim trailing whitespace. */ > + for (int i = len - 1; i >= 0; i++, len--) > + if (ns[len] != ' ' && ns[len] != '\t') Similarly !ISSPACE (ns[len]) here. > + break; > + > + /* Parse off any modifiers. */ > + while (!isalnum (*ns)) Should be ISALNUM (to make it independent of the locale) > + { > + modifier += *(ns++); > + len--; > + } > + > + /* What remains is the name. */ > + name.assign (ns, len); > + if (numeric) > + idx = std::stoi(name); Formatting: space before "(". > + } > + > + /* Adds a character to the end of the string. */ > + void add (char c) > + { > + con += c; > + } > + > + /* Output the string in the form of a brand-new char *, then effectively > + clear the internal string by resetting len to 0. */ > + char * out () Formatting: no need for a space before "out". > + { > + /* Final character is always a trailing comma, so strip it out. */ trailing ',', ';' or ']', rather than just a comma? > + char * q; Similarly no space before "q" here. > + if (modifier.empty ()) > + q = xstrndup (con.c_str (), con.size () - 1); Could just be "xstrdup (con.c_str ())". > + else > + { > + int len = con.size () + modifier.size (); > + q = XNEWVEC (char, len); > + strncpy (q, modifier.c_str (), modifier.size ()); > + strncpy (q + modifier.size (), con.c_str (), con.size ()); > + q[len -1] = '\0'; > + } Do we need the separation between "modifier" and "cons"? It looks like the code completes the initialisation of "modifier" before it writes to "cons", and so we could just use a single string. > + > + con.clear (); > + modifier.clear (); > + return q; > + } > +}; > + > +typedef std::vector<conlist> vec_conlist; > + > +/* Add constraints to an rtx. The match_operand/match_scratch that are > matched > + must be in depth-first order i.e. read from top to bottom in the pattern. This is no longer true (thanks). Because of that... > + index is the index of the conlist we are up to so far. ...I don't think we need the index parameter or the return value. Instead, maybe we should reset vec_conlist::idx to -1... > + This function is similar to remove_constraints. > + Errors if adding the constraints would overwrite existing constraints. > + Returns 1 + index of last conlist to be matched. */ > + > +static unsigned int > +add_constraints (rtx part, file_location loc, unsigned int index, > + vec_conlist &cons) > +{ > + const char *format_ptr; > + > + if (part == NULL_RTX || index == cons.size ()) > + return index; > + > + /* If match_op or match_scr, check if we have the right one, and if so, > copy > + over the constraint list. */ > + if (GET_CODE (part) == MATCH_OPERAND || GET_CODE (part) == MATCH_SCRATCH) > + { > + int field = GET_CODE (part) == MATCH_OPERAND ? 2 : 1; > + int id = XINT (part, 0); > + > + if (XSTR (part, field)[0] != '\0') > + { > + error_at (loc, "can't mix normal and compact constraint syntax"); > + return cons.size (); > + } > + XSTR (part, field) = cons[id].out (); ...here, to indicate that the constraint has been consumed. It would be good to check that id < cons.size (). > + > + ++index; > + } > + > + format_ptr = GET_RTX_FORMAT (GET_CODE (part)); > + > + /* Recursively search the rtx. */ > + for (int i = 0; i < GET_RTX_LENGTH (GET_CODE (part)); i++) > + switch (*format_ptr++) > + { > + case 'e': > + case 'u': > + index = add_constraints (XEXP (part, i), loc, index, cons); > + break; > + case 'E': > + if (XVEC (part, i) != NULL) > + for (int j = 0; j < XVECLEN (part, i); j++) > + index = add_constraints (XVECEXP (part, i, j), loc, index, cons); > + break; > + default: > + continue; > + } > + > + return index; > +} > + > +/* Add attributes to an rtx. The attributes that are matched must be in order > + i.e. read from top to bottom in the pattern. > + Errors if adding the attributes would overwrite existing attributes. > + Returns 1 + index of last conlist to be matched. */ > + > +static unsigned int > +add_attributes (rtx x, file_location loc, vec_conlist &attrs) > +{ > + unsigned int attr_index = GET_CODE (x) == DEFINE_INSN ? 4 : 3; > + unsigned int index = 0; > + > + if (XVEC (x, attr_index) == NULL) > + return index; > + > + for (int i = 0; i < XVECLEN (x, attr_index); ++i) > + { > + rtx part = XVECEXP (x, attr_index, i); > + > + if (GET_CODE (part) != SET_ATTR) > + continue; > + > + if (attrs[index].name.compare (XSTR (part, 0)) == 0) Just "attrs[index].name == XSTR (part, 0)" should work. > + { > + if (XSTR (part, 1) && XSTR (part, 1)[0] != '\0') > + { > + error_at (loc, "can't mix normal and compact attribute syntax"); > + break; > + } > + XSTR (part, 1) = attrs[index].out (); > + > + ++index; > + if (index == attrs.size ()) > + break; > + } It looks like you forgive mixing new-style and old-style syntax, since there's no "else error" here. But the documentation said that that wasn't allowed. Either way seems OK to me, but see the next comment. > + } > + > + return index; > +} > + > +/* Modify the attributes list to make space for the implicitly declared > + attributes in the attrs: list. */ > + > +static void > +create_missing_attributes (rtx x, file_location /* loc */, vec_conlist > &attrs) > +{ > + if (attrs.empty ()) > + return; > + > + unsigned int attr_index = GET_CODE (x) == DEFINE_INSN ? 4 : 3; > + vec_conlist missing; > + > + /* This is an O(n*m) loop but it's fine, both n and m will always be very > + small. */ Agreed that quadraticness isn't a problem. But I wonder how many people would write an explicit placeholder set_attr. Unlike match_operand and match_scratch, a placeholder set_attr doesn't carry any additional information. It might be simpler to drop add_attributes and add all attributes unconditionally in this function instead. If the user tries to specify the same attribute using both syntaxes, the pattern would end up with two definitions of the same attribute, which ought to be flagged by existing code. > + for (conlist cl : attrs) > + { > + bool found = false; > + for (int i = 0; XVEC (x, attr_index) && i < XVECLEN (x, attr_index); > ++i) > + { > + rtx part = XVECEXP (x, attr_index, i); > + > + if (GET_CODE (part) != SET_ATTR > + || cl.name.compare (XSTR (part, 0)) == 0) > + { > + found = true; > + break; > + } > + } > + > + if (!found) > + missing.push_back (cl); > + } > + > + rtvec orig = XVEC (x, attr_index); > + size_t n_curr = orig ? XVECLEN (x, attr_index) : 0; > + rtvec copy = rtvec_alloc (n_curr + missing.size ()); > + > + /* Create a shallow copy of existing entries. */ > + memcpy (©->elem[missing.size ()], &orig->elem[0], sizeof (rtx) * > n_curr); > + XVEC (x, attr_index) = copy; > + > + /* Create the new elements. */ > + for (unsigned i = 0; i < missing.size (); i++) > + { > + rtx attr = rtx_alloc (SET_ATTR); > + XSTR (attr, 0) = xstrdup (attrs[i].name.c_str ()); > + XSTR (attr, 1) = NULL; > + XVECEXP (x, attr_index, i) = attr; > + } > + > + return; > +} > + > +/* Consumes spaces and tabs. */ > + > +static inline void > +skip_spaces (const char **str) > +{ > + while (**str == ' ' || **str == '\t') ISSPACE here too. > + (*str)++; > +} > + > +/* Consumes the given character, if it's there. */ > + > +static inline bool > +expect_char (const char **str, char c) > +{ > + if (**str != c) > + return false; > + (*str)++; > + return true; > +} > + > +/* Parses the section layout that follows a "{@}" if using new syntax. Builds > + a vector for a single section. E.g. if we have "attrs: length arch)..." > + then list will have two elements, the first for "length" and the second > + for "arch". */ > + > +static void > +parse_section_layout (const char **templ, const char *label, > + vec_conlist &list, bool numeric) > +{ > + const char *name_start; > + size_t label_len = strlen (label); > + if (strncmp (label, *templ, label_len) == 0) > + { > + *templ += label_len; > + > + /* Gather the names. */ > + while (**templ != ';' && **templ != ']') > + { > + skip_spaces (templ); > + name_start = *templ; > + int len = 0; > + char val = (*templ)[len]; > + while (val != ',' && val != ';' && val != ']') > + val = (*templ)[++len]; > + *templ += len; > + if (val == ',') > + (*templ)++; > + list.push_back (conlist (name_start, len, numeric)); > + } > + } > +} > + > +/* Parse a section, a section is defined as a named space separated list, > e.g. > + > + foo: a b c Now comma-separated rather than space-separated. Applies to the example too. > + > + is a section named "foo" with entries a,b and c. */ > + > +static void > +parse_section (const char **templ, unsigned int n_elems, unsigned int alt_no, > + vec_conlist &list, file_location loc, const char *name) > +{ > + unsigned int i; > + > + /* Go through the list, one character at a time, adding said character > + to the correct string. */ > + for (i = 0; **templ != ']' && **templ != ';'; (*templ)++) > + { > + if (**templ != ' ' && **templ != '\t') !ISSPACE > + { > + list[i].add(**templ); Formatting: should be a space before "(". > + if (**templ == ',') > + { > + ++i; > + if (i == n_elems) > + fatal_at (loc, "too many %ss in alternative %d: expected %d", > + name, alt_no, n_elems); > + } > + } > + } > + > + if (i + 1 < n_elems) > + fatal_at (loc, "too few %ss in alternative %d: expected %d, got %d", > + name, alt_no, n_elems, i); > + > + list[i].add(','); > +} > + > +/* The compact syntax has more convience syntaxes. As such we post process > + the lines to get them back to something the normal syntax understands. */ > + > +static void > +preprocess_compact_syntax (file_location loc, int alt_no, std::string &line, > + std::string &last_line) > +{ > + /* Check if we're copying the last statement. */ > + if (line.find ("^") == 0 && line.size () == 1) > + { > + if (last_line.empty ()) > + fatal_at (loc, "found instruction to copy previous line (^) in" > + "alternative %d but no previous line to copy", alt_no); > + line = last_line; > + return; > + } > + > + std::string result; > + std::string buffer; > + /* Check if we have << which means return c statement. */ > + if (line.find ("<<") == 0) > + { > + result.append ("* return "); > + result.append (line.substr (3)); Seems like this should be line.substr (2) or that the find() should include a space after "<<". As it stands, we'd accept <<X and drop the X. > + } > + else > + result.append (line); > + > + line = result; > + return; > +} > + > +/* Converts an rtx from compact syntax to normal syntax if possible. */ > + > +static void > +convert_syntax (rtx x, file_location loc) > +{ > + int alt_no; > + unsigned int index, templ_index; > + const char *templ; > + vec_conlist tconvec, convec, attrvec; > + > + templ_index = GET_CODE (x) == DEFINE_INSN ? 3 : 2; > + > + templ = XTMPL (x, templ_index); > + > + /* Templates with constraints start with "{@". */ > + if (strncmp ("*{@", templ, 3)) > + return; > + > + /* Get the layout for the template. */ > + templ += 3; > + skip_spaces (&templ); > + > + if (!expect_char (&templ, '[')) > + fatal_at (loc, "expecing `[' to begin section list"); > + > + parse_section_layout (&templ, "cons:", tconvec, true); > + convec.resize (tconvec.size ()); > + > + /* Check for any duplicate cons entries and sort based on i. */ > + for (unsigned i = 0; i < tconvec.size (); i++) > + { > + int idx = tconvec[i].idx; > + if (convec[idx].idx >= 0) > + fatal_at (loc, "duplicate cons number found: %d", idx); > + convec[idx] = tconvec[i]; > + } > + tconvec.clear (); "convec.resize (tconvec.size ());" isn't guaranteed to be enough if the cons: skips operands. Either we need to calculate the maximum idx first, or we need to grow convec on demand. > + > + Nit: excess whitespace > + if (*templ != ']') > + { > + if (*templ == ';') > + skip_spaces (&(++templ)); > + parse_section_layout (&templ, "attrs:", attrvec, false); > + create_missing_attributes (x, loc, attrvec); > + } > + > + if (!expect_char (&templ, ']')) > + { > + fatal_at (loc, "expecting `]` to end section list - section list " > + "must have cons first, attrs second"); > + } Formatting nit: unnecessary braces > + > + /* We will write the un-constrainified template into new_templ. */ > + std::string new_templ; > + new_templ.append ("@"); > + > + /* Skip to the first proper line. */ > + while (*templ++ != '\n'); This seems to allow anything to follow the "]". Should we instead use skip_spaces and then require a '\n'? > + alt_no = 0; > + > + std::string last_line; > + > + /* Process the alternatives. */ > + while (*(templ - 1) != '\0') > + { > + /* Copy leading whitespace. */ > + std::string buffer; > + while (*templ == ' ' || *templ == '\t') > + buffer += *templ++; Why do we need to do that? The '@' handling in genoutput.cc seems to skip whatever space is present. I was wondering if it was so that column numbers matched in compiler error messages against "<<" lines, but those would already be off because of the "* return" transformation (not an issue that needs to be fixed). > + > + /* Check if we're at the end. */ > + if (templ[0] == '}' && templ[1] == '\0') > + break; > + > + new_templ += '\n'; > + new_templ.append (buffer); > + > + if (expect_char (&templ, '[')) > + { > + /* Parse the constraint list, then the attribute list. */ > + if (convec.size () > 0) > + parse_section (&templ, convec.size (), alt_no, convec, loc, > + "constraint"); > + > + if (attrvec.size () > 0) > + { > + if (convec.size () > 0 && !expect_char (&templ, ';')) > + fatal_at (loc, "expected `;' to separate constraints " > + "and attributes in alternative %d", alt_no); > + > + parse_section (&templ, attrvec.size (), alt_no, > + attrvec, loc, "attribute"); > + } > + > + if (!expect_char (&templ, ']')) > + fatal_at (loc, "expected end of constraint/attribute list but " > + "missing an ending `]' in alternative %d", alt_no); > + } > + else if (templ[0] == '/' && templ[1] == '/') > + { > + templ+=2; Formatting: should be spaces around "+=". But here, and... > + /* Glob till newline or end of string. */ > + while (*templ != '\n' || *templ != '\0') > + templ++; > + } > + else if (templ[0] == '/' && templ[1] == '*') > + { > + templ+=2; > + /* Glob till newline or end of multiline comment. */ > + while (templ[0] != '*' && templ[1] != '/') > + templ++; > + templ++; ...especially here, I think we should instead completely skip lines with comments and then "continue", without adding anything to new_templ for that iteration of the loop. That would ensure that: (a) multi-line // comments work correctly (b) a comment at the end gets silently dropped without adding a line to the new template > + } > + else > + fatal_at (loc, "expected constraint/attribute list at beginning of " > + "alternative %d but missing a starting `['", alt_no); > + > + /* Skip whitespace between list and asm. */ > + ++templ; > + skip_spaces (&templ); > + > + /* Copy asm to new template. */ > + std::string line; > + while (*templ != '\n' && *templ != '\0') > + line += *templ++; > + > + /* Apply any pre-processing needed to the line. */ > + preprocess_compact_syntax (loc, alt_no, line, last_line); > + new_templ.append (line); > + last_line = line; > + > + /* The processing is very sensitive to whitespace, so preserve > + all but the trailing ones. */ > + if (templ[0] == '\n') > + *templ++; Is the point here that we allow the closing "}" to be on its own line? It might be worth calling that out explicitly if so. In other words, I'd understood this to mean something like: /* Normal "*..." syntax expects the closing quote to be on the final line of asm, whereas we allow the closing "}" to be on its own line. Postpone copying the '\n' until we know that there is another alternative in the list. */ > + ++alt_no; > + } > + > + /* Write the constraints and attributes into their proper places. */ > + if (convec.size () > 0) > + { > + index = add_constraints (x, loc, 0, convec); > + if (index < convec.size ()) > + fatal_at (loc, "could not find match_operand/scratch with id %d", > + convec[index].idx); > + } > + > + if (attrvec.size () > 0) > + { > + index = add_attributes (x, loc, attrvec); > + if (index < attrvec.size ()) > + fatal_at (loc, "could not find set_attr for attribute %s", > + attrvec[index].name.c_str ()); > + } > + > + /* Copy over the new un-constrainified template. */ > + XTMPL (x, templ_index) = xstrdup (new_templ.c_str ()); > + > + /* Register for later checks during iterator expansions. */ > + compact_syntax.add (x); > + > +#if DEBUG > + print_rtl_single (stderr, x); > +#endif IMO it'd be better to drop this. It's easy enough to add locally if that's what someone wants. ("make mddump" would also be useful for debugging this.) Thanks, Richard > +} > + > /* Process a top level rtx in some way, queuing as appropriate. */ > > static void > @@ -553,10 +1121,12 @@ process_rtx (rtx desc, file_location loc) > switch (GET_CODE (desc)) > { > case DEFINE_INSN: > + convert_syntax (desc, loc); > queue_pattern (desc, &define_insn_tail, loc); > break; > > case DEFINE_COND_EXEC: > + convert_syntax (desc, loc); > queue_pattern (desc, &define_cond_exec_tail, loc); > break; > > @@ -631,6 +1201,7 @@ process_rtx (rtx desc, file_location loc) > attr = XVEC (desc, split_code + 1); > PUT_CODE (desc, DEFINE_INSN); > XVEC (desc, 4) = attr; > + convert_syntax (desc, loc); > > /* Queue them. */ > insn_elem = queue_pattern (desc, &define_insn_tail, loc);