On Fri, Feb 27, 2015 at 6:04 AM, Jose Fonseca <jfons...@vmware.com
<mailto:jfons...@vmware.com>> wrote:
On 26/02/15 18:07, Brian Paul wrote:
On 02/26/2015 09:51 AM, Jose Fonseca wrote:
This is to enable the code to build with -Werror=vla in the
short term,
and enable the code to build with MSVC2013 soon after.
---
include/c99_alloca.h | 45
++++++++++++++++++++++++++++++__++++++
src/glsl/nir/nir_from_ssa.c | 19 +++++++--------
src/glsl/nir/nir_live___variables.c | 5 ++--
src/glsl/nir/nir_lower_vars___to_ssa.c | 13 +++++++----
4 files changed, 66 insertions(+), 16 deletions(-)
create mode 100644 include/c99_alloca.h
diff --git a/include/c99_alloca.h b/include/c99_alloca.h
new file mode 100644
index 0000000..6d96d06
--- /dev/null
+++ b/include/c99_alloca.h
@@ -0,0 +1,45 @@
+/****************************__******************************__****************
+ *
+ * Copyright 2015 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
obtaining a
+ * copy of this software and associated documentation files
(the
+ * "Software"), to deal in the Software without
restriction, including
+ * without limitation the rights to use, copy, modify,
merge, publish,
+ * distribute, sub license, and/or sell copies of the
Software, and to
+ * permit persons to whom the Software is furnished to do
so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice
(including the
+ * next paragraph) shall be included in all copies or
substantial
portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF
ANY KIND,
EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
ACTION OF
CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+
******************************__******************************__**************/
+
+#ifndef _C99_ALLOCA_H_
+#define _C99_ALLOCA_H_
+
+
+#if defined(_MSC_VER)
+
+# include <malloc.h>
+
+# define alloca _alloca
+
+#else /* !defined(_MSC_VER) */
+
+# include <alloca.h>
+
+#endif /* !defined(_MSC_VER) */
+
+
+#endif
diff --git a/src/glsl/nir/nir_from_ssa.c
b/src/glsl/nir/nir_from_ssa.c
index c695c95..66339f3 100644
--- a/src/glsl/nir/nir_from_ssa.c
+++ b/src/glsl/nir/nir_from_ssa.c
@@ -26,6 +26,7 @@
*/
#include "nir.h"
+#include "c99_alloca.h"
/*
* This file implements an out-of-SSA pass as described
in "Revisiting
@@ -181,7 +182,7 @@ merge_merge_sets(merge_set *a, merge_set *b)
static bool
merge_sets_interfere(merge_set *a, merge_set *b)
{
- merge_node *dom[a->size + b->size];
+ merge_node **dom = alloca((a->size + b->size) * sizeof
*dom);
int dom_idx = -1;
struct exec_node *an = exec_list_get_head(&a->nodes);
@@ -673,21 +674,21 @@
resolve_parallel_copy(nir___parallel_copy_instr
*pcopy,
}
/* The register/source corresponding to the given index */
- nir_src values[num_copies * 2];
- memset(values, 0, sizeof values);
+ nir_src *values = alloca(num_copies * 2 * sizeof *values);
+ memset(values, 0, num_copies * 2 * sizeof *values);
/* The current location of a given piece of data */
- int loc[num_copies * 2];
+ int *loc = alloca(num_copies * 2 * sizeof *loc);
/* The piece of data that the given piece of data is
to be copied
from */
- int pred[num_copies * 2];
+ int *pred = alloca(num_copies * 2 * sizeof *pred);
These three are all pretty small. < 10 elements in the usual case.
It's going to be a crazy shader if they get above, say, 50 or 100 entries.
/* Initialize loc and pred. We will use -1 for "null" */
- memset(loc, -1, sizeof loc);
- memset(pred, -1, sizeof pred);
+ memset(loc, -1, num_copies * 2 * sizeof *loc);
+ memset(pred, -1, num_copies * 2 * sizeof *pred);
/* The destinations we have yet to properly fill */
- int to_do[num_copies * 2];
+ int *to_do = alloca(num_copies * 2 * sizeof *to_do);
int to_do_idx = -1;
/* Now we set everything up:
@@ -737,7 +738,7 @@
resolve_parallel_copy(nir___parallel_copy_instr *pcopy,
}
/* Currently empty destinations we can go ahead and
fill */
- int ready[num_copies * 2];
+ int *ready = alloca(num_copies * 2 * sizeof *ready);
Also small. See above.
int ready_idx = -1;
/* Mark the ones that are ready for copying. We know
an index is a
diff --git a/src/glsl/nir/nir_live___variables.c
b/src/glsl/nir/nir_live___variables.c
index 7402dc0..b57ca3a 100644
--- a/src/glsl/nir/nir_live___variables.c
+++ b/src/glsl/nir/nir_live___variables.c
@@ -26,6 +26,7 @@
#include "nir.h"
#include "nir_worklist.h"
+#include "c99_alloca.h"
/*
* Basic liveness analysis. This works only in SSA form.
@@ -130,8 +131,8 @@ static bool
propagate_across_edge(nir___block *pred, nir_block *succ,
struct live_variables_state *state)
{
- BITSET_WORD live[state->bitset_words];
- memcpy(live, succ->live_in, sizeof live);
+ BITSET_WORD *live = alloca(state->bitset_words * sizeof
*live);
+ memcpy(live, succ->live_in, state->bitset_words * sizeof
*live);
For this one, we might as well ralloc a single temporary, store it in
live_variables_state, and re-use it. We're not really saving anything
by "reallocating" it each time; especially since we memcpy anyway.
nir_foreach_instr(succ, instr) {
if (instr->type != nir_instr_type_phi)
diff --git a/src/glsl/nir/nir_lower_vars___to_ssa.c
b/src/glsl/nir/nir_lower_vars___to_ssa.c
index 8af7530..f54d1b7 100644
--- a/src/glsl/nir/nir_lower_vars___to_ssa.c
+++ b/src/glsl/nir/nir_lower_vars___to_ssa.c
@@ -27,6 +27,9 @@
#include "nir.h"
+#include "c99_alloca.h"
+
+
struct deref_node {
struct deref_node *parent;
const struct glsl_type *type;
@@ -899,8 +902,8 @@ rename_variables_block(nir___block
*block, struct
lower_variables_state *state)
static void
insert_phi_nodes(struct lower_variables_state *state)
{
- unsigned work[state->impl->num_blocks];
- unsigned has_already[state->impl->num___blocks];
+ unsigned *work = alloca(state->impl->num_blocks * sizeof
*work);
+ unsigned *has_already = alloca(state->impl->num_blocks *
sizeof
*has_already);
/*
* Since the work flags already prevent us from
inserting a node
that has
@@ -910,10 +913,10 @@ insert_phi_nodes(struct
lower_variables_state
*state)
* function. So all we need to handle W is an array
and a pointer
to the
* next element to be inserted and the next element to
be removed.
*/
- nir_block *W[state->impl->num_blocks];
+ nir_block **W = alloca(state->impl->num_blocks * sizeof *W);
- memset(work, 0, sizeof work);
- memset(has_already, 0, sizeof has_already);
+ memset(work, 0, state->impl->num_blocks * sizeof *work);
+ memset(has_already, 0, state->impl->num_blocks * sizeof
*has_already);
num_blocks shouldn't be large, so you can probably go ahead and put this
on the stack. On the other hand, insert_phi_nodes is called once per
run of the optimization loop and we malloc enough other stuff that we
won't notice (from a performance perspective) if you just malloc it.
unsigned w_start, w_end;
unsigned iter_count = 0;
Looks OK to me.
One thing I might have done would be instead of:
unsigned *work = alloca(state->impl->num_blocks * sizeof *work);
...
memset(work, 0, state->impl->num_blocks * sizeof *work);
do
const int work_size = state->impl->num_blocks * sizeof *work;
unsigned *work = alloca(work_size);
> ...
> memset(work, 0, work_size);
It's not a bad idea. The snafu is that `work` must be declared
before word_size so that `sizeof *work` is valid.
That or we must use the actual type of `*work`, but that is more
dangerous as it's very easy to get it wrong, particularly when
dealing with pointers of pointers.
So this would need to look like
unsigned *work;
const size_t work_size = state->impl->num_blocks * sizeof *work;
work = alloca(work_size);
...
memset(work, 0, work_size);
I'll post this in a follow-up review.
...
memset(work, 0, work_size);
AFAIK, there's no zeroing version of alloca().
Yes, I also searched. And unfortunately inline functions can't be
used due to alloca semantics. I'm not sure if there's any
C-preprocessor magic.
Anyway, depending on the maximum size of these alloca arrays, we
might actually need to use malloc, as Windows stack is 1MB by
default, plus the OpenGL driver only gets to use whatever stack is
left from the application.
I doubt you'll have too much trouble there. Most of those should be <
1K even for really big shaders and none of these functions ever call any
of the others. I've made a few comments above about potential specific
non-alloca solutions that might be better for some of them.