This re-arranges things so that we have a chance to optimize the bitpacking to a series of shifts and bit operations with constant operands. We still fail to fully optimize this due to various reasons but the "head" of streamer_write_tree_bitfields and streamer_read_tree_bitfields now looks sane.
LTO bootstrapped on x86_64-unknown-linux-gnu, applied. It should give us a few % back in streaming performance. Not sure if we want to expose our tree-struct hierarchy more in this function - we do a quite non-sensical if-cascade in streamer_read_tree_bitfields (but keeping a better one in-sync with tree_node_structure_for_code and tree_contains_struct is one more thing to remember when updating things there...). Richard. 2015-02-09 Richard Biener <rguent...@suse.de> * tree-streamer.h (streamer_pack_tree_bitfields): Remove. (streamer_write_tree_bitfields): Declare. * tree-streamer-in.c (unpack_ts_base_value_fields): Inline, properly unpack padding. (unpack_value_fields): Inline ... (streamer_read_tree_bitfields): ... here. * tree-streamer-out.c (pack_ts_base_value_fields): Inline and properly add padding bits. (streamer_pack_tree_bitfields): Fold into ... (streamer_write_tree_bitfields): ... this new function, exposing the bitpack object. * lto-streamer-out.c (lto_write_tree_1): Call streamer_write_tree_bitfields. Index: gcc/tree-streamer.h =================================================================== --- gcc/tree-streamer.h (revision 220535) +++ gcc/tree-streamer.h (working copy) @@ -85,8 +85,7 @@ void streamer_write_string_cst (struct o struct lto_output_stream *, tree); void streamer_write_chain (struct output_block *, tree, bool); void streamer_write_tree_header (struct output_block *, tree); -void streamer_pack_tree_bitfields (struct output_block *, struct bitpack_d *, - tree); +void streamer_write_tree_bitfields (struct output_block *, tree); void streamer_write_tree_body (struct output_block *, tree, bool); void streamer_write_integer_cst (struct output_block *, tree, bool); void streamer_write_builtin (struct output_block *, tree); Index: gcc/tree-streamer-in.c =================================================================== --- gcc/tree-streamer-in.c (revision 220535) +++ gcc/tree-streamer-in.c (working copy) @@ -123,7 +123,7 @@ streamer_read_chain (struct lto_input_bl /* Unpack all the non-pointer fields of the TS_BASE structure of expression EXPR from bitpack BP. */ -static void +static inline void unpack_ts_base_value_fields (struct bitpack_d *bp, tree expr) { /* Note that the code for EXPR has already been unpacked to create EXPR in @@ -158,6 +158,8 @@ unpack_ts_base_value_fields (struct bitp TREE_STATIC (expr) = (unsigned) bp_unpack_value (bp, 1); if (TREE_CODE (expr) != TREE_BINFO) TREE_PRIVATE (expr) = (unsigned) bp_unpack_value (bp, 1); + else + bp_unpack_value (bp, 1); TREE_PROTECTED (expr) = (unsigned) bp_unpack_value (bp, 1); TREE_DEPRECATED (expr) = (unsigned) bp_unpack_value (bp, 1); if (TYPE_P (expr)) @@ -166,9 +168,12 @@ unpack_ts_base_value_fields (struct bitp TYPE_ADDR_SPACE (expr) = (unsigned) bp_unpack_value (bp, 8); } else if (TREE_CODE (expr) == SSA_NAME) - SSA_NAME_IS_DEFAULT_DEF (expr) = (unsigned) bp_unpack_value (bp, 1); + { + SSA_NAME_IS_DEFAULT_DEF (expr) = (unsigned) bp_unpack_value (bp, 1); + bp_unpack_value (bp, 8); + } else - bp_unpack_value (bp, 1); + bp_unpack_value (bp, 9); } @@ -456,115 +461,102 @@ unpack_ts_omp_clause_value_fields (struc } } -/* Unpack all the non-pointer fields in EXPR into a bit pack. */ -static void -unpack_value_fields (struct data_in *data_in, struct bitpack_d *bp, tree expr) +/* Read all the language-independent bitfield values for EXPR from IB. + Return the partially unpacked bitpack so the caller can unpack any other + bitfield values that the writer may have written. */ + +struct bitpack_d +streamer_read_tree_bitfields (struct lto_input_block *ib, + struct data_in *data_in, tree expr) { enum tree_code code; + struct bitpack_d bp; + + /* Read the bitpack of non-pointer values from IB. */ + bp = streamer_read_bitpack (ib); - code = TREE_CODE (expr); + /* The first word in BP contains the code of the tree that we + are about to read. */ + code = (enum tree_code) bp_unpack_value (&bp, 16); + lto_tag_check (lto_tree_code_to_tag (code), + lto_tree_code_to_tag (TREE_CODE (expr))); /* Note that all these functions are highly sensitive to changes in the types and sizes of each of the fields being packed. */ - unpack_ts_base_value_fields (bp, expr); + unpack_ts_base_value_fields (&bp, expr); if (CODE_CONTAINS_STRUCT (code, TS_INT_CST)) - unpack_ts_int_cst_value_fields (bp, expr); + unpack_ts_int_cst_value_fields (&bp, expr); if (CODE_CONTAINS_STRUCT (code, TS_REAL_CST)) - unpack_ts_real_cst_value_fields (bp, expr); + unpack_ts_real_cst_value_fields (&bp, expr); if (CODE_CONTAINS_STRUCT (code, TS_FIXED_CST)) - unpack_ts_fixed_cst_value_fields (bp, expr); + unpack_ts_fixed_cst_value_fields (&bp, expr); if (CODE_CONTAINS_STRUCT (code, TS_DECL_MINIMAL)) - DECL_SOURCE_LOCATION (expr) = stream_input_location (bp, data_in); + DECL_SOURCE_LOCATION (expr) = stream_input_location (&bp, data_in); if (CODE_CONTAINS_STRUCT (code, TS_DECL_COMMON)) - unpack_ts_decl_common_value_fields (bp, expr); + unpack_ts_decl_common_value_fields (&bp, expr); if (CODE_CONTAINS_STRUCT (code, TS_DECL_WRTL)) - unpack_ts_decl_wrtl_value_fields (bp, expr); + unpack_ts_decl_wrtl_value_fields (&bp, expr); if (CODE_CONTAINS_STRUCT (code, TS_DECL_WITH_VIS)) - unpack_ts_decl_with_vis_value_fields (bp, expr); + unpack_ts_decl_with_vis_value_fields (&bp, expr); if (CODE_CONTAINS_STRUCT (code, TS_FUNCTION_DECL)) - unpack_ts_function_decl_value_fields (bp, expr); + unpack_ts_function_decl_value_fields (&bp, expr); if (CODE_CONTAINS_STRUCT (code, TS_TYPE_COMMON)) - unpack_ts_type_common_value_fields (bp, expr); + unpack_ts_type_common_value_fields (&bp, expr); if (CODE_CONTAINS_STRUCT (code, TS_EXP)) { - SET_EXPR_LOCATION (expr, stream_input_location (bp, data_in)); + SET_EXPR_LOCATION (expr, stream_input_location (&bp, data_in)); if (code == MEM_REF || code == TARGET_MEM_REF) { MR_DEPENDENCE_CLIQUE (expr) - = (unsigned)bp_unpack_value (bp, sizeof (short) * 8); + = (unsigned)bp_unpack_value (&bp, sizeof (short) * 8); if (MR_DEPENDENCE_CLIQUE (expr) != 0) MR_DEPENDENCE_BASE (expr) - = (unsigned)bp_unpack_value (bp, sizeof (short) * 8); + = (unsigned)bp_unpack_value (&bp, sizeof (short) * 8); } } if (CODE_CONTAINS_STRUCT (code, TS_BLOCK)) - unpack_ts_block_value_fields (data_in, bp, expr); + unpack_ts_block_value_fields (data_in, &bp, expr); if (CODE_CONTAINS_STRUCT (code, TS_TRANSLATION_UNIT_DECL)) - unpack_ts_translation_unit_decl_value_fields (data_in, bp, expr); + unpack_ts_translation_unit_decl_value_fields (data_in, &bp, expr); if (CODE_CONTAINS_STRUCT (code, TS_OPTIMIZATION)) - cl_optimization_stream_in (bp, TREE_OPTIMIZATION (expr)); + cl_optimization_stream_in (&bp, TREE_OPTIMIZATION (expr)); if (CODE_CONTAINS_STRUCT (code, TS_BINFO)) { - unsigned HOST_WIDE_INT length = bp_unpack_var_len_unsigned (bp); + unsigned HOST_WIDE_INT length = bp_unpack_var_len_unsigned (&bp); if (length > 0) vec_safe_grow (BINFO_BASE_ACCESSES (expr), length); } if (CODE_CONTAINS_STRUCT (code, TS_CONSTRUCTOR)) { - unsigned HOST_WIDE_INT length = bp_unpack_var_len_unsigned (bp); + unsigned HOST_WIDE_INT length = bp_unpack_var_len_unsigned (&bp); if (length > 0) vec_safe_grow (CONSTRUCTOR_ELTS (expr), length); } #ifndef ACCEL_COMPILER if (CODE_CONTAINS_STRUCT (code, TS_TARGET_OPTION)) - cl_target_option_stream_in (data_in, bp, TREE_TARGET_OPTION (expr)); + cl_target_option_stream_in (data_in, &bp, TREE_TARGET_OPTION (expr)); #endif if (code == OMP_CLAUSE) - unpack_ts_omp_clause_value_fields (data_in, bp, expr); -} - - -/* Read all the language-independent bitfield values for EXPR from IB. - Return the partially unpacked bitpack so the caller can unpack any other - bitfield values that the writer may have written. */ - -struct bitpack_d -streamer_read_tree_bitfields (struct lto_input_block *ib, - struct data_in *data_in, tree expr) -{ - enum tree_code code; - struct bitpack_d bp; - - /* Read the bitpack of non-pointer values from IB. */ - bp = streamer_read_bitpack (ib); - - /* The first word in BP contains the code of the tree that we - are about to read. */ - code = (enum tree_code) bp_unpack_value (&bp, 16); - lto_tag_check (lto_tree_code_to_tag (code), - lto_tree_code_to_tag (TREE_CODE (expr))); - - /* Unpack all the value fields from BP. */ - unpack_value_fields (data_in, &bp, expr); + unpack_ts_omp_clause_value_fields (data_in, &bp, expr); return bp; } Index: gcc/tree-streamer-out.c =================================================================== --- gcc/tree-streamer-out.c (revision 220535) +++ gcc/tree-streamer-out.c (working copy) @@ -92,7 +92,7 @@ write_identifier (struct output_block *o /* Pack all the non-pointer fields of the TS_BASE structure of expression EXPR into bitpack BP. */ -static void +static inline void pack_ts_base_value_fields (struct bitpack_d *bp, tree expr) { bp_pack_value (bp, TREE_CODE (expr), 16); @@ -129,6 +129,8 @@ pack_ts_base_value_fields (struct bitpac bp_pack_value (bp, TREE_STATIC (expr), 1); if (TREE_CODE (expr) != TREE_BINFO) bp_pack_value (bp, TREE_PRIVATE (expr), 1); + else + bp_pack_value (bp, 0, 1); bp_pack_value (bp, TREE_PROTECTED (expr), 1); bp_pack_value (bp, TREE_DEPRECATED (expr), 1); if (TYPE_P (expr)) @@ -137,9 +139,12 @@ pack_ts_base_value_fields (struct bitpac bp_pack_value (bp, TYPE_ADDR_SPACE (expr), 8); } else if (TREE_CODE (expr) == SSA_NAME) - bp_pack_value (bp, SSA_NAME_IS_DEFAULT_DEF (expr), 1); + { + bp_pack_value (bp, SSA_NAME_IS_DEFAULT_DEF (expr), 1); + bp_pack_value (bp, 0, 8); + } else - bp_pack_value (bp, 0, 1); + bp_pack_value (bp, 0, 9); } @@ -417,78 +422,80 @@ pack_ts_omp_clause_value_fields (struct /* Pack all the bitfields in EXPR into a bit pack. */ void -streamer_pack_tree_bitfields (struct output_block *ob, - struct bitpack_d *bp, tree expr) +streamer_write_tree_bitfields (struct output_block *ob, tree expr) { + bitpack_d bp = bitpack_create (ob->main_stream); enum tree_code code; code = TREE_CODE (expr); /* Note that all these functions are highly sensitive to changes in the types and sizes of each of the fields being packed. */ - pack_ts_base_value_fields (bp, expr); + pack_ts_base_value_fields (&bp, expr); if (CODE_CONTAINS_STRUCT (code, TS_INT_CST)) - pack_ts_int_cst_value_fields (bp, expr); + pack_ts_int_cst_value_fields (&bp, expr); if (CODE_CONTAINS_STRUCT (code, TS_REAL_CST)) - pack_ts_real_cst_value_fields (bp, expr); + pack_ts_real_cst_value_fields (&bp, expr); if (CODE_CONTAINS_STRUCT (code, TS_FIXED_CST)) - pack_ts_fixed_cst_value_fields (bp, expr); + pack_ts_fixed_cst_value_fields (&bp, expr); if (CODE_CONTAINS_STRUCT (code, TS_DECL_MINIMAL)) - stream_output_location (ob, bp, DECL_SOURCE_LOCATION (expr)); + stream_output_location (ob, &bp, DECL_SOURCE_LOCATION (expr)); if (CODE_CONTAINS_STRUCT (code, TS_DECL_COMMON)) - pack_ts_decl_common_value_fields (bp, expr); + pack_ts_decl_common_value_fields (&bp, expr); if (CODE_CONTAINS_STRUCT (code, TS_DECL_WRTL)) - pack_ts_decl_wrtl_value_fields (bp, expr); + pack_ts_decl_wrtl_value_fields (&bp, expr); if (CODE_CONTAINS_STRUCT (code, TS_DECL_WITH_VIS)) - pack_ts_decl_with_vis_value_fields (bp, expr); + pack_ts_decl_with_vis_value_fields (&bp, expr); if (CODE_CONTAINS_STRUCT (code, TS_FUNCTION_DECL)) - pack_ts_function_decl_value_fields (bp, expr); + pack_ts_function_decl_value_fields (&bp, expr); if (CODE_CONTAINS_STRUCT (code, TS_TYPE_COMMON)) - pack_ts_type_common_value_fields (bp, expr); + pack_ts_type_common_value_fields (&bp, expr); if (CODE_CONTAINS_STRUCT (code, TS_EXP)) { - stream_output_location (ob, bp, EXPR_LOCATION (expr)); + stream_output_location (ob, &bp, EXPR_LOCATION (expr)); if (code == MEM_REF || code == TARGET_MEM_REF) { - bp_pack_value (bp, MR_DEPENDENCE_CLIQUE (expr), sizeof (short) * 8); + bp_pack_value (&bp, MR_DEPENDENCE_CLIQUE (expr), sizeof (short) * 8); if (MR_DEPENDENCE_CLIQUE (expr) != 0) - bp_pack_value (bp, MR_DEPENDENCE_BASE (expr), sizeof (short) * 8); + bp_pack_value (&bp, MR_DEPENDENCE_BASE (expr), sizeof (short) * 8); } } if (CODE_CONTAINS_STRUCT (code, TS_BLOCK)) - pack_ts_block_value_fields (ob, bp, expr); + pack_ts_block_value_fields (ob, &bp, expr); if (CODE_CONTAINS_STRUCT (code, TS_TRANSLATION_UNIT_DECL)) - pack_ts_translation_unit_decl_value_fields (ob, bp, expr); + pack_ts_translation_unit_decl_value_fields (ob, &bp, expr); if (CODE_CONTAINS_STRUCT (code, TS_OPTIMIZATION)) - cl_optimization_stream_out (bp, TREE_OPTIMIZATION (expr)); + cl_optimization_stream_out (&bp, TREE_OPTIMIZATION (expr)); if (CODE_CONTAINS_STRUCT (code, TS_BINFO)) - bp_pack_var_len_unsigned (bp, vec_safe_length (BINFO_BASE_ACCESSES (expr))); + bp_pack_var_len_unsigned (&bp, vec_safe_length (BINFO_BASE_ACCESSES (expr))); if (CODE_CONTAINS_STRUCT (code, TS_CONSTRUCTOR)) - bp_pack_var_len_unsigned (bp, CONSTRUCTOR_NELTS (expr)); + bp_pack_var_len_unsigned (&bp, CONSTRUCTOR_NELTS (expr)); if (CODE_CONTAINS_STRUCT (code, TS_TARGET_OPTION) /* Don't stream these when passing things to a different target. */ && !lto_stream_offload_p) - cl_target_option_stream_out (ob, bp, TREE_TARGET_OPTION (expr)); + cl_target_option_stream_out (ob, &bp, TREE_TARGET_OPTION (expr)); if (code == OMP_CLAUSE) - pack_ts_omp_clause_value_fields (ob, bp, expr); + pack_ts_omp_clause_value_fields (ob, &bp, expr); + + streamer_write_bitpack (&bp); } Index: gcc/lto-streamer-out.c =================================================================== --- gcc/lto-streamer-out.c (revision 220535) +++ gcc/lto-streamer-out.c (working copy) @@ -385,9 +385,7 @@ lto_write_tree_1 (struct output_block *o { /* Pack all the non-pointer fields in EXPR into a bitpack and write the resulting bitpack. */ - bitpack_d bp = bitpack_create (ob->main_stream); - streamer_pack_tree_bitfields (ob, &bp, expr); - streamer_write_bitpack (&bp); + streamer_write_tree_bitfields (ob, expr); /* Write all the pointer fields in EXPR. */ streamer_write_tree_body (ob, expr, ref_p);