[PATCH v13 16/40] c, c++: Use 16 bits for all use of enum rid for more keyword space

Ken Matsui via Gcc-patches Thu, 14 Sep 2023 19:40:08 -0700

Now that RID_MAX has reached 255, we need to update the bit sizes of every
use of the enum rid from 8 to 16 to support more keywords.


For struct token_indent_info, the 8-bit increase does not change the overall
struct size because the 8-bit just consumes 1 byte from 2 bytes of external
fragmentation.  Since reordering the fields just changes 1 byte of internal
fragmentation to 1 byte of external fragmentation, I keep the original field
order.

For struct c_token, the 8-bit expansion increased the overall struct size from
24 bytes to 32 bytes.  The original struct takes 4 bytes of internal
fragmentation (after the location field) and 3 bytes of external
fragmentation.  Keeping the original order with the 8-bit expansion gives
7 bytes of internal fragmentation (3 bytes after the pragma_kind field + 4
bytes after the location field) and 7 bytes of external fragmentation. Since
the original field order was not optimal, reordering the fields results in the
same overall size as the original one.  I updated the field order to the most
efficient order.

For struct cp_token, reordering the fields only minimizes internal
fragmentation and does not minimize the overall struct size.  I keep the
original field order. The original struct size was 16 bytes with 3 bits of
internal fragmentation.  With this 8-bit update, the overall size would be
24 bytes.  Since there is no external fragmentation and 7 bytes + 3 bits of
internal fragmentation, reordering the fields does not minimize the overall
size.  I keep the orignal field order.

Suppose a pointer takes 8 bytes and int takes 4 bytes. Then, struct
ht_identifier takes 16 bytes, and union _cpp_hashnode_value takes 8 bytes.
For struct cpp_hashnode, the 8-bit increase consumes 1 more byte, resulting in
33 bytes except for paddings.  The original overall size before the 8-bit
increase was 32 bytes.  However, due to fragmentation, the overall struct size
would be 40 bytes.  Since there is no external fragmentation and 3 bytes + 5
bits of internal fragmentation, reordering the fields does not minimize the
overall size.  I keep the original field order.

gcc/c-family/ChangeLog:

        * c-indentation.h (struct token_indent_info): Make keyword 16 bits.

gcc/c/ChangeLog:

        * c-parser.cc (c_parse_init): Handle RID_MAX not to exceed the max
        value of 16 bits.
        * c-parser.h (struct c_token): Make keyword 16 bits. Reorder the
        fields to minimize memory fragmentation.

gcc/cp/ChangeLog:

        * parser.h (struct cp_token): Make keyword 16 bits.
        (struct cp_lexer): Make saved_keyword 16 bits.

libcpp/ChangeLog:

        * include/cpplib.h (struct cpp_hashnode): Make rid_code 16 bits.

Signed-off-by: Ken Matsui <kmat...@gcc.gnu.org>
---
 gcc/c-family/c-indentation.h |  2 +-
 gcc/c/c-parser.cc            |  6 +++---
 gcc/c/c-parser.h             | 14 +++++++-------
 gcc/cp/parser.h              |  8 +++++---
 libcpp/include/cpplib.h      |  7 +++++--
 5 files changed, 21 insertions(+), 16 deletions(-)

diff --git a/gcc/c-family/c-indentation.h b/gcc/c-family/c-indentation.h
index c0e07bf49f1..6d2b88f01a3 100644
--- a/gcc/c-family/c-indentation.h
+++ b/gcc/c-family/c-indentation.h
@@ -26,7 +26,7 @@ struct token_indent_info
 {
   location_t location;
   ENUM_BITFIELD (cpp_ttype) type : 8;
-  ENUM_BITFIELD (rid) keyword : 8;
+  ENUM_BITFIELD (rid) keyword : 16;
 };
 
 /* Extract token information from TOKEN, which ought to either be a
diff --git a/gcc/c/c-parser.cc b/gcc/c/c-parser.cc
index b9a1b75ca43..2086f253923 100644
--- a/gcc/c/c-parser.cc
+++ b/gcc/c/c-parser.cc
@@ -115,9 +115,9 @@ c_parse_init (void)
   tree id;
   int mask = 0;
 
-  /* Make sure RID_MAX hasn't grown past the 8 bits used to hold the keyword in
-     the c_token structure.  */
-  gcc_assert (RID_MAX <= 255);
+  /* Make sure RID_MAX hasn't grown past the 16 bits used to hold the keyword
+     in the c_token structure.  */
+  gcc_assert (RID_MAX <= 65535);
 
   mask |= D_CXXONLY;
   if (!flag_isoc99)
diff --git a/gcc/c/c-parser.h b/gcc/c/c-parser.h
index 545f0f4d9eb..6a9bd22a793 100644
--- a/gcc/c/c-parser.h
+++ b/gcc/c/c-parser.h
@@ -51,21 +51,21 @@ enum c_id_kind {
 /* A single C token after string literal concatenation and conversion
    of preprocessing tokens to tokens.  */
 struct GTY (()) c_token {
+  /* The value associated with this token, if any.  */
+  tree value;
+  /* The location at which this token was found.  */
+  location_t location;
+  /* If this token is a keyword, this value indicates which keyword.
+     Otherwise, this value is RID_MAX.  */
+  ENUM_BITFIELD (rid) keyword : 16;
   /* The kind of token.  */
   ENUM_BITFIELD (cpp_ttype) type : 8;
   /* If this token is a CPP_NAME, this value indicates whether also
      declared as some kind of type.  Otherwise, it is C_ID_NONE.  */
   ENUM_BITFIELD (c_id_kind) id_kind : 8;
-  /* If this token is a keyword, this value indicates which keyword.
-     Otherwise, this value is RID_MAX.  */
-  ENUM_BITFIELD (rid) keyword : 8;
   /* If this token is a CPP_PRAGMA, this indicates the pragma that
      was seen.  Otherwise it is PRAGMA_NONE.  */
   ENUM_BITFIELD (pragma_kind) pragma_kind : 8;
-  /* The location at which this token was found.  */
-  location_t location;
-  /* The value associated with this token, if any.  */
-  tree value;
   /* Token flags.  */
   unsigned char flags;
 
diff --git a/gcc/cp/parser.h b/gcc/cp/parser.h
index 6cbb9a8e031..7aa251d11b1 100644
--- a/gcc/cp/parser.h
+++ b/gcc/cp/parser.h
@@ -44,7 +44,7 @@ struct GTY (()) cp_token {
   enum cpp_ttype type : 8;
   /* If this token is a keyword, this value indicates which keyword.
      Otherwise, this value is RID_MAX.  */
-  enum rid keyword : 8;
+  enum rid keyword : 16;
   /* Token flags.  */
   unsigned char flags;
   /* True if this token is from a context where it is implicitly extern "C" */
@@ -59,7 +59,9 @@ struct GTY (()) cp_token {
   bool purged_p : 1;
   bool tree_check_p : 1;
   bool main_source_p : 1;
-  /* 3 unused bits.  */
+  /* These booleans use 5 bits within 1 byte, resulting in 3 unused bits.
+     Since there would be 3 bytes of internal fragmentation to the location
+     field, the total unused bits would be 27 (= 3 + 24).  */
 
   /* The location at which this token was found.  */
   location_t location;
@@ -102,7 +104,7 @@ struct GTY (()) cp_lexer {
 
   /* Saved pieces of end token we replaced with the eof token.  */
   enum cpp_ttype saved_type : 8;
-  enum rid saved_keyword : 8;
+  enum rid saved_keyword : 16;
 
   /* The next lexer in a linked list of lexers.  */
   struct cp_lexer *next;
diff --git a/libcpp/include/cpplib.h b/libcpp/include/cpplib.h
index fcdaf082b09..7c37b861a77 100644
--- a/libcpp/include/cpplib.h
+++ b/libcpp/include/cpplib.h
@@ -988,11 +988,14 @@ struct GTY(()) cpp_hashnode {
   unsigned int directive_index : 7;    /* If is_directive,
                                           then index into directive table.
                                           Otherwise, a NODE_OPERATOR.  */
-  unsigned int rid_code : 8;           /* Rid code - for front ends.  */
+  unsigned int rid_code : 16;          /* Rid code - for front ends.  */
   unsigned int flags : 9;              /* CPP flags.  */
   ENUM_BITFIELD(node_type) type : 2;   /* CPP node type.  */
 
-  /* 5 bits spare.  */
+  /* These bitfields use 35 bits (= 1 + 7 + 16 + 9 + 2).  The exceeded 3 bits
+     in terms of bytes leave 5 unused bits within 1 byte.  Since there would
+     be 3 bytes of internal fragmentation to the deferred field, the total
+     unused bits would be 29 (= 5 + 24).  */
 
   /* The deferred cookie is applicable to NT_USER_MACRO or NT_VOID.
      The latter for when a macro had a prevailing undef.
-- 
2.42.0

[PATCH v13 16/40] c, c++: Use 16 bits for all use of enum rid for more keyword space

Reply via email to