Hello Micheal,

Thanks for this lead! It is almost exactly what I need. I do have one more question about this. It seems that the types obtained via FOR_EACH_FUNCTION_ARGS and TREE_TYPE are different pointers when compiled with -flto.

What do I mean by this? Consider the following code:

#include <stdio.h>
int main(){
  FILE *f = fopen("hello.txt", "w");
  fclose(f);
  return 0;
}

The trees corresponding to types FILE* and FILE obtained via the variable f are different from the trees obtained from the argument to fclose.

Let's say that we have a gcc pass with the following global variables:

tree _local_file_ptr_type;
tree _local_file_type;
tree _glibc_file_ptr_type;
tree _glibc_file_type;

These variables will hold the trees that correspond to types:

* FILE* and FILE, obtained via TREE_TYPE(f)
* FILE* and FILE, obtained via FOREACH_FUNCTION_ARGS(fclose_func,i,t)

And these variables will be compared using pointer equality. Here, we can print the address of the variables to find out their values.

log("%p =?= %p\n", _local_file_ptr_type, _glibc_file_ptr_type);
log("%p =?= %p\n", _local_file_type, _glibc_file_type);

When the simple C program is compiled via:
/path/to/gcc a.c -fdump-ipa-hello-world -fipa-hello-world
we see that the pointers are the same.

pointers 0xffff7a8dcb70 =?= 0xffff7a8dcb70
records 0xffff7a8dbfa0 =?= 0xffff7a8dbfa0

However, when we are compiling the simple C program via
/path/to/gcc -flto a.c -fdump-ipa-hello-world -fipa-hello-world
/path/to/gcc -flto -flto-patition=none -fipa-hello-world a.c -o a.out
one can see that the pointers are different:

pointers 0xffff79ee1c38 =?= 0xffff79ee0b28
records 0xffff79ee1b90 =?= 0xffff79ee0a80

Do you, or anyone else for that matter, know if it would be possible
to keep the trees pointing to the same address? Or, in case it can be possible with some modifications, where could I start looking to modify the source code to make these addresses match? The other alternative for me would be to make my own type comparison function, which is something I can do. But I was wondering about this first.

Here is the patch necessary for running this hello world pass. The interesting part of the code is in ipa-hello-world.c . The utils file is only used for printing out a human readable name.

diff --git a/gcc/Makefile.in b/gcc/Makefile.in
index fa9923bb270..1c0fef5c8a2 100644
--- a/gcc/Makefile.in
+++ b/gcc/Makefile.in
@@ -1399,6 +1399,8 @@ OBJS = \
        incpath.o \
        init-regs.o \
        internal-fn.o \
+       ipa-hello-world.o \
+       ipa-str-reorg-utils.o \
        ipa-cp.o \
        ipa-sra.o \
        ipa-devirt.o \
diff --git a/gcc/common.opt b/gcc/common.opt
index 4368910cb54..d61498d722c 100644
--- a/gcc/common.opt
+++ b/gcc/common.opt
@@ -3392,4 +3392,8 @@ fipa-ra
 Common Report Var(flag_ipa_ra) Optimization
 Use caller save register across calls if possible.

+fipa-hello-world
+Common Report Var(flag_ipa_hello_world) Optimization
+TBD
+
 ; This comment is to ensure we retain the blank line above.
diff --git a/gcc/ipa-hello-world.c b/gcc/ipa-hello-world.c
new file mode 100644
index 00000000000..41cab07c357
--- /dev/null
+++ b/gcc/ipa-hello-world.c
@@ -0,0 +1,192 @@
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "backend.h"
+#include "tree.h"
+#include "gimple-expr.h"
+#include "predict.h"
+#include "alloc-pool.h"
+#include "tree-pass.h"
+#include "cgraph.h"
+#include "diagnostic.h"
+#include "fold-const.h"
+#include "gimple-fold.h"
+#include "symbol-summary.h"
+#include "tree-vrp.h"
+#include "ipa-prop.h"
+#include "tree-pretty-print.h"
+#include "tree-inline.h"
+#include "ipa-fnsummary.h"
+#include "ipa-utils.h"
+#include "tree-ssa-ccp.h"
+#include "stringpool.h"
+#include "attribs.h"
+
+
+#include <set>
+#include <stdarg.h>
+
+#include "ipa-str-reorg-utils.h"
+
+inline static void
+log(const char* format, ...)
+{
+  va_list args;
+  va_start(args, format);
+  if (!dump_file) return;
+
+  vfprintf(dump_file, format, args);
+  va_end(args);
+}
+
+inline static void
+log_decl(tree decl)
+{
+  if (!dump_file) return;
+
+  print_generic_decl (dump_file, decl, TDF_DETAILS);
+}
+
+tree _local_file_type;
+tree _local_file_ptr_type;
+tree _glibc_file_ptr_type;
+tree _glibc_file_type;
+
+static void
+find_local_file_type_locals(tree var_decl)
+{
+  gcc_assert (var_decl);
+  const enum tree_code code = TREE_CODE(var_decl);
+  const bool is_var_decl = VAR_DECL == code;
+  tree identifier_node = DECL_NAME(var_decl);
+  if (!identifier_node) return;
+
+  const char* ob_identifier_ptr = IDENTIFIER_POINTER(identifier_node);
+  gcc_assert(ob_identifier_ptr);
+  const char* ex_identifier_ptr = "f";
+ const bool is_same_identifier = strcmp(ex_identifier_ptr, ob_identifier_ptr) == 0;
+  if (!is_same_identifier) return;
+
+  _local_file_ptr_type = TREE_TYPE(var_decl);
+  gcc_assert(_local_file_ptr_type);
+  _local_file_type = TREE_TYPE(_local_file_ptr_type);
+  gcc_assert(_local_file_type);
+}
+
+static void
+find_local_file_type_main(const cgraph_node *cnode)
+{
+  gcc_assert (cnode);
+  tree decl = cnode->decl;
+  gcc_assert (decl);
+  const enum tree_code code = TREE_CODE(decl);
+  const bool is_function_decl = FUNCTION_DECL == code;
+  gcc_assert (is_function_decl);
+  function *func = DECL_STRUCT_FUNCTION (decl);
+  gcc_assert (func);
+  tree var_decl = NULL;
+  int i = 0;
+  FOR_EACH_LOCAL_DECL (func, i, var_decl)
+  {
+    find_local_file_type_locals(var_decl);
+  }
+}
+
+static void
+find_local_file_type_function()
+{
+  cgraph_node *cnode = NULL;
+  FOR_EACH_DEFINED_FUNCTION (cnode)
+  {
+    cnode->get_untransformed_body ();
+    const char* ob_func_name = cnode->name();
+    const char* ex_func_name = "main";
+    const bool same_name = strcmp(ob_func_name, ex_func_name) == 0;
+    if (!same_name) continue;
+    find_local_file_type_main(cnode);
+
+  }
+}
+
+static void
+find_glibc_file_type_fclose(const cgraph_node *cnode)
+{
+  gcc_assert(cnode);
+  tree decl = cnode->decl;
+  gcc_assert(decl);
+  const enum tree_code code = TREE_CODE(decl);
+  const bool is_function_decl = FUNCTION_DECL == code;
+  gcc_assert (is_function_decl);
+  tree type = TREE_TYPE(decl);
+  gcc_assert(type);
+  function_args_iterator iter;
+  tree t;
+  FOREACH_FUNCTION_ARGS(type, t, iter)
+  {
+    const char* ob_type_name = get_type_name(t);
+    const char* ex_type_name = "FILE*";
+    const bool is_same_name = strcmp(ex_type_name, ob_type_name) == 0;
+    if (!is_same_name) continue;
+
+    _glibc_file_ptr_type = t;
+    gcc_assert (_glibc_file_ptr_type);
+    _glibc_file_type = TREE_TYPE(t);
+    gcc_assert (_glibc_file_type);
+  }
+}
+
+static void
+find_glibc_file_type_function()
+{
+  cgraph_node *cnode = NULL;
+  FOR_EACH_FUNCTION (cnode)
+  {
+    const char* ob_func_name = cnode->name();
+    const char* ex_func_name = "fclose";
+    const bool same_name = strcmp(ob_func_name, ex_func_name) == 0;
+    if (!same_name) continue;
+    find_glibc_file_type_fclose(cnode);
+  }
+}
+
+static unsigned int
+iphw_execute()
+{
+  find_local_file_type_function();
+  find_glibc_file_type_function();
+  log("pointers %p =?= %p\n", _local_file_ptr_type, _glibc_file_ptr_type);
+  log("records %p =?= %p\n", _local_file_type, _glibc_file_type);
+  return 0;
+}
+
+namespace {
+const pass_data pass_data_ipa_hello_world =
+{
+  SIMPLE_IPA_PASS,
+  "hello-world",
+  OPTGROUP_NONE,
+  TV_NONE,
+  (PROP_cfg | PROP_ssa),
+  0,
+  0,
+  0,
+  0,
+};
+
+class pass_ipa_hello_world : public simple_ipa_opt_pass
+{
+public:
+  pass_ipa_hello_world (gcc::context *ctx)
+    : simple_ipa_opt_pass(pass_data_ipa_hello_world, ctx)
+  {}
+
+  virtual bool gate(function*) { return flag_ipa_hello_world; }
+  virtual unsigned execute (function*) { return iphw_execute(); }
+};
+} // anon namespace
+
+simple_ipa_opt_pass*
+make_pass_ipa_hello_world (gcc::context *ctx)
+{
+  return new pass_ipa_hello_world (ctx);
+}
diff --git a/gcc/ipa-str-reorg-utils.c b/gcc/ipa-str-reorg-utils.c
new file mode 100644
index 00000000000..af2ec906911
--- /dev/null
+++ b/gcc/ipa-str-reorg-utils.c
@@ -0,0 +1,285 @@
+#include "ipa-str-reorg-utils.h"
+
+// This really should be inaccessible to anyone.
+const_tree
+get_base_type_from_ptr_or_arr_type (const_tree old_pointer_type,
+                                   const_tree pointer_type,
+                                   unsigned int &indirection_level)
+{
+  if (pointer_type == NULL)
+    {
+      gcc_assert (TREE_CODE (old_pointer_type) != POINTER_TYPE);
+      gcc_assert (TREE_CODE (old_pointer_type) != ARRAY_TYPE);
+      return old_pointer_type;
+    }
+  return get_base_type_from_ptr_or_arr_type (pointer_type,
+                                            TREE_TYPE (pointer_type),
+                                            ++indirection_level);
+}
+
+const_tree
+get_base_type_from_ptr_or_arr_type (const_tree ptr_or_array,
+                                   unsigned int &indirection_level)
+{
+  const bool is_array = TREE_CODE (ptr_or_array) == ARRAY_TYPE;
+  const bool is_ptr = TREE_CODE (ptr_or_array) == POINTER_TYPE;
+  const bool is_array_or_ptr = is_array || is_ptr;
+  gcc_assert (is_array_or_ptr);
+  indirection_level = 0;
+  return get_base_type_from_ptr_or_arr_type (ptr_or_array,
+                                            TREE_TYPE (ptr_or_array),
+                                            indirection_level);
+}
+
+const_tree
+get_base_type_from_array_type (const_tree array_type,
+                              unsigned int &indirection_level)
+{
+  gcc_assert (TREE_CODE (array_type) == ARRAY_TYPE);
+ return get_base_type_from_ptr_or_arr_type (array_type, indirection_level);
+}
+
+const_tree
+get_base_type_from_array_type (const_tree array_type)
+{
+  gcc_assert (TREE_CODE (array_type) == ARRAY_TYPE);
+  unsigned int indirection_level;
+  return get_base_type_from_array_type (array_type, indirection_level);
+}
+
+const_tree
+get_base_type_from_pointer_type (const_tree pointer_type,
+                                unsigned int &indirection_level)
+{
+  gcc_assert (TREE_CODE (pointer_type) == POINTER_TYPE);
+ return get_base_type_from_ptr_or_arr_type (pointer_type, indirection_level);
+}
+
+const_tree
+get_base_type_from_pointer_type (const_tree pointer_type)
+{
+  gcc_assert (TREE_CODE (pointer_type) == POINTER_TYPE);
+  unsigned int indirection_level;
+  return get_base_type_from_pointer_type (pointer_type, indirection_level);
+}
+
+const_tree
+get_base_type (const_tree type)
+{
+  enum tree_code tree_code_type = TREE_CODE(type);
+  switch (tree_code_type)
+  {
+    case ARRAY_TYPE: return get_base_type_from_array_type(type); break;
+    case POINTER_TYPE: return get_base_type_from_pointer_type(type); break;
+    default: return type; break;
+  }
+
+  gcc_unreachable();
+  return NULL;
+}
+const char *
+make_pointer_name (const_tree pointer)
+{
+  gcc_assert (TREE_CODE (pointer) == POINTER_TYPE);
+  unsigned int indirection_level;
+  const_tree base_type
+    = get_base_type_from_pointer_type (pointer, indirection_level);
+ const char *pointer_name = make_pointer_name (base_type, indirection_level);
+  return pointer_name;
+}
+
+const char *
+make_pointer_or_array_name (const char *base_type, const char *postfix)
+{
+  char *ptr;
+  int calculated_size = strlen (base_type) + strlen (postfix);
+  // TODO: Do not use asprintf?
+  // We'll let exit() deal with freeing this memory.
+  int retval = asprintf (&ptr, "%s%s", base_type, postfix);
+  gcc_assert (retval == calculated_size);
+  return ptr;
+}
+
+const char *
+make_array_postfix (unsigned int indirection_level)
+{
+  gcc_assert (indirection_level > 0);
+  static const char *max_indirection_level_str_array
+    = "[][][][][][][][][][][][][]";
+ static const size_t size_array = strlen (max_indirection_level_str_array);
+  static const size_t postfix_size_array = 2;
+  static const size_t max_indirection_level_array
+    = size_array / postfix_size_array;
+  gcc_assert (indirection_level < max_indirection_level_array);
+  return max_indirection_level_str_array + size_array
+        - (indirection_level * postfix_size_array);
+}
+
+const char *
+make_pointer_postfix (unsigned int indirection_level)
+{
+  gcc_assert (indirection_level > 0);
+  static const char *max_indirection_level_str_pointer
+    = "************************";
+ static const size_t size_pointer = strlen (max_indirection_level_str_pointer);
+  static const size_t postfix_size_pointer = 1;
+  static const size_t max_indirection_level_pointer
+    = size_pointer / postfix_size_pointer;
+  gcc_assert (indirection_level < max_indirection_level_pointer);
+  return max_indirection_level_str_pointer + size_pointer
+        - (indirection_level * postfix_size_pointer);
+}
+
+const char *
+make_pointer_name (const char *base_type_name,
+                  const unsigned int indirection_level)
+{
+  const char *postfix = make_pointer_postfix (indirection_level);
+  const char *ptr = make_pointer_or_array_name (base_type_name, postfix);
+  return ptr;
+}
+
+const char *
+make_pointer_name (const_tree base_type, const unsigned int indirection_level)
+{
+  const char *struct_name = get_type_name (base_type);
+  return make_pointer_name (struct_name, indirection_level);
+}
+
+
+const char *
+make_array_name (const char *base_type_name,
+                const unsigned int indirection_level)
+{
+  const char *postfix = make_array_postfix (indirection_level);
+  const char *ptr = make_pointer_or_array_name (base_type_name, postfix);
+  return ptr;
+}
+
+const char *
+make_array_name (const_tree base_type, const unsigned int indirection_level)
+{
+  const char *struct_name = get_type_name (base_type);
+  return make_array_name (struct_name, indirection_level);
+}
+
+// TODO: deal with anonymous structs.
+// Some records don't have identifier pointers
+const char *
+get_record_name (const_tree record)
+{
+  gcc_assert (record);
+  gcc_assert (TREE_CODE (record) == RECORD_TYPE);
+  tree name_tree = TYPE_NAME (record);
+  // The TYPE_NAME will be NULL_TREE for a type
+  // that is not a built-in type, the result of a typedef
+  // or a named class type.
+  // TODO: verify that we are never changing
+  // <name_tree_is_null> types
+  if (name_tree == NULL_TREE)
+    {
+      return "<name_tree_is_null>";
+    }
+
+  if (TREE_CODE (name_tree) == TYPE_DECL)
+    {
+      tree type_name = DECL_NAME (name_tree);
+      return IDENTIFIER_POINTER (type_name);
+    }
+  const char *identifier = IDENTIFIER_POINTER (name_tree);
+  gcc_assert (identifier);
+  return identifier;
+}
+
+const char *
+make_array_name (const_tree array)
+{
+  gcc_assert (TREE_CODE (array) == ARRAY_TYPE);
+  unsigned int indirection_level;
+  const_tree base_type
+    = get_base_type_from_array_type (array, indirection_level);
+  const char *array_name = make_array_name (base_type, indirection_level);
+  return array_name;
+}
+
+const char *
+get_array_name (const_tree array)
+{
+  gcc_assert (array);
+  gcc_assert (TREE_CODE (array) == ARRAY_TYPE);
+  const bool is_modified = TYPE_NAME (array);
+  if (is_modified)
+    return IDENTIFIER_POINTER (TYPE_NAME (array));
+
+  return make_array_name (array);
+}
+
+const char *
+get_pointer_name (const_tree pointer)
+{
+  gcc_assert (pointer);
+  gcc_assert (TREE_CODE (pointer) == POINTER_TYPE);
+  const bool is_modified = TYPE_NAME (pointer);
+  if (is_modified)
+    return IDENTIFIER_POINTER (TYPE_NAME (pointer));
+
+  const char *new_pointer_name = make_pointer_name (pointer);
+  return new_pointer_name;
+}
+
+const char *
+make_reference_name (const_tree ref)
+{
+  gcc_assert (TREE_CODE (ref) == REFERENCE_TYPE);
+  const_tree base_type = TREE_TYPE (ref);
+  const char *old_name = get_type_name (base_type);
+  char *ptr;
+  static const char *prefix = "&";
+  static const char *suffix = ".reorg";
+  int new_size = strlen (prefix) + strlen (old_name) + strlen (suffix);
+  int retval = asprintf (&ptr, "%s%s%s", prefix, old_name, suffix);
+  gcc_assert (retval == new_size);
+  return ptr;
+}
+
+
+const char *
+get_reference_name (const_tree ref)
+{
+  gcc_assert (ref);
+  gcc_assert (TREE_CODE (ref) == REFERENCE_TYPE);
+  const bool is_modified = TYPE_NAME (ref);
+  if (is_modified)
+    return IDENTIFIER_POINTER (TYPE_NAME (ref));
+
+  const char *new_pointer_name = make_reference_name (ref);
+  return new_pointer_name;
+}
+
+const char *
+get_type_name (const_tree type)
+{
+  enum tree_code code = TREE_CODE (type);
+  switch (code)
+    {
+    case ARRAY_TYPE:
+      return get_array_name (type);
+      break;
+    case POINTER_TYPE:
+      return get_pointer_name (type);
+      break;
+    case RECORD_TYPE:
+      return get_record_name (type);
+      break;
+    case REFERENCE_TYPE:
+      return get_reference_name (type);
+      break;
+    default:
+      // TODO: generalize even more?
+      // wait for experimental results to dictate what
+      // else we should specify.
+      return get_tree_code_name (code);
+      break;
+    }
+  return NULL;
+}
diff --git a/gcc/ipa-str-reorg-utils.h b/gcc/ipa-str-reorg-utils.h
new file mode 100644
index 00000000000..76f701ec74f
--- /dev/null
+++ b/gcc/ipa-str-reorg-utils.h
@@ -0,0 +1,36 @@
+#ifndef GCC_IPA_STR_REORG_UTILS_H
+#define GCC_IPA_STR_REORG_UTILS_H
+#pragma once
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tree.h"
+
+const char * get_type_name (const_tree type);
+const char * get_reference_name (const_tree ref);
+const char * make_reference_name (const_tree ref);
+const char * get_pointer_name (const_tree pointer);
+const char * get_array_name (const_tree array);
+const char * get_record_name (const_tree record);
+const char * make_array_name (const_tree base_type, const unsigned int indirection_level); +const char * make_array_name (const char *base_type_name, const unsigned int indirection_level); +const char * make_pointer_name (const_tree base_type, const unsigned int indirection_level); +const char * make_pointer_name (const char *base_type_name, const unsigned int indirection_level);
+const char * make_pointer_postfix (unsigned int indirection_level);
+const char * make_array_postfix (unsigned int indirection_level);
+const char * make_array_name (const_tree array);
+const char * make_pointer_or_array_name (const char *base_type, const char *postfix);
+const char * make_pointer_name (const_tree pointer);
+const_tree get_base_type_from_ptr_or_arr_type (const_tree old_pointer_type, const_tree pointer_type, unsigned int &indirection_level); +const_tree get_base_type_from_ptr_or_arr_type (const_tree ptr_or_array, unsigned int &indirection_level); +const_tree get_base_type_from_array_type (const_tree array_type, unsigned int &indirection_level);
+const_tree get_base_type_from_array_type (const_tree array_type);
+const_tree get_base_type_from_pointer_type (const_tree pointer_type, unsigned int &indirection_level);
+const_tree get_base_type_from_pointer_type (const_tree pointer_type);
+const_tree get_base_type (const_tree type);
+
+
+
+#endif
+
diff --git a/gcc/passes.def b/gcc/passes.def
index 2bf2cb78fc5..66f333f81dc 100644
--- a/gcc/passes.def
+++ b/gcc/passes.def
@@ -149,6 +149,7 @@ along with GCC; see the file COPYING3.  If not see
   NEXT_PASS (pass_ipa_profile);
   NEXT_PASS (pass_ipa_icf);
   NEXT_PASS (pass_ipa_devirt);
+  NEXT_PASS (pass_ipa_hello_world);
   NEXT_PASS (pass_ipa_cp);
   NEXT_PASS (pass_ipa_sra);
   NEXT_PASS (pass_ipa_cdtor_merge);
diff --git a/gcc/tree-pass.h b/gcc/tree-pass.h
index a1207a20a3c..377dda689cc 100644
--- a/gcc/tree-pass.h
+++ b/gcc/tree-pass.h
@@ -501,6 +501,7 @@ extern ipa_opt_pass_d *make_pass_ipa_fn_summary (gcc::context *ctxt);
 extern ipa_opt_pass_d *make_pass_ipa_inline (gcc::context *ctxt);
extern simple_ipa_opt_pass *make_pass_ipa_free_lang_data (gcc::context *ctxt); extern simple_ipa_opt_pass *make_pass_ipa_free_fn_summary (gcc::context *ctxt);
+extern simple_ipa_opt_pass *make_pass_ipa_hello_world (gcc::context *ctxt);
 extern ipa_opt_pass_d *make_pass_ipa_cp (gcc::context *ctxt);
 extern ipa_opt_pass_d *make_pass_ipa_sra (gcc::context *ctxt);
 extern ipa_opt_pass_d *make_pass_ipa_icf (gcc::context *ctxt);


On 13/03/2020 16:15, Michael Matz wrote:
Hello,

On Fri, 13 Mar 2020, Erick Ochoa wrote:

+    for (tree parm = DECL_ARGUMENTS (undefined_function->decl); parm; parm =
DECL_CHAIN (parm))
+     {
+       tree type = TREE_TYPE(parm);
+       if (dump_file) fprintf(dump_file, "I want the type, do I have it?
%s\n", type ? "true" : "false");
+     }
+  }
+  return 0;
+}

I have added the complete patch below, however the function iphw_execute
encapsulates the logic I am trying at the moment.

The problem is that while this program runs, DECL_ARGUMENTS returns NULL and
therefore the loop is never entered. This is true for functions that have
arguments, such as puts/malloc/... and others in glibc.

As argument (types) conceptually belong to the functions type (not its
decl), you should look at the function decls type, not at DECL_ARGUMENTS.
See the FOREACH_FUNCTION_ARGS iterator and its helpers.  Note that you
need to pass it TREE_TYPE(funcdecl).

(DECL_ARGUMENTS is the list of formal parameters viewed from the function
bodies perspective, so without a body that isn't filled).


Ciao,
Michael.

Reply via email to