[Mesa-dev] [Bug 69135] New: Make EGLUT-Wayland code catch up with latest stable Wayland (1.2)

2013-09-09 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=69135

  Priority: medium
Bug ID: 69135
  Assignee: mesa-dev@lists.freedesktop.org
   Summary: Make EGLUT-Wayland code catch up with latest stable
Wayland (1.2)
  Severity: major
Classification: Unclassified
OS: All
  Reporter: manuel.bachm...@eurogiciel.fr
  Hardware: Other
Status: NEW
   Version: git
 Component: Demos
   Product: Mesa

Created attachment 85492
  --> https://bugs.freedesktop.org/attachment.cgi?id=85492&action=edit
eglut-wayland12.patch

Current EGLUT helper code (used to compile the "es2gears" demo e.g) breaks with
Wayland 1.2.0 and later.

(the Wayland API has been significantly revised between 1.0 and 1.2)

Please consider the attached patch, which allows "libeglut_wayland" to compile
again and examples to run against it.

There is still a problem : demo starts but doesn't refresh (only the first
frame shows). I guess I missed something in a callback. Any insight on this
topic is welcome.

-- 
You are receiving this mail because:
You are the assignee for the bug.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/3] gallium: add flush_resource context function

2013-09-09 Thread Grigori Goronzy
From: Marek Olšák 

r600g needs explicit flushing before DRI2 buffers are presented on the screen.

v2: add (stub) implementations for all drivers, fix frontbuffer flushing
---
 src/gallium/docs/source/context.rst | 13 +
 src/gallium/drivers/freedreno/freedreno_resource.c  |  6 ++
 src/gallium/drivers/galahad/glhd_context.c  |  7 +++
 src/gallium/drivers/i915/i915_surface.c |  6 ++
 src/gallium/drivers/identity/id_context.c   | 11 +++
 src/gallium/drivers/ilo/ilo_blit.c  |  6 ++
 src/gallium/drivers/llvmpipe/lp_surface.c   |  7 +++
 src/gallium/drivers/noop/noop_pipe.c|  8 
 src/gallium/drivers/nv30/nv30_miptree.c |  6 ++
 src/gallium/drivers/nv30/nv30_resource.c|  1 +
 src/gallium/drivers/nv30/nv30_resource.h|  4 
 src/gallium/drivers/nv50/nv50_surface.c |  7 +++
 src/gallium/drivers/nvc0/nvc0_surface.c |  7 +++
 src/gallium/drivers/r300/r300_blit.c|  6 ++
 src/gallium/drivers/r600/r600_blit.c|  6 ++
 src/gallium/drivers/radeonsi/r600_blit.c|  6 ++
 src/gallium/drivers/rbug/rbug_context.c | 15 +++
 src/gallium/drivers/softpipe/sp_surface.c   |  7 +++
 src/gallium/drivers/svga/svga_pipe_blit.c   |  8 
 src/gallium/drivers/trace/tr_context.c  | 21 +
 src/gallium/include/pipe/p_context.h| 13 +
 .../state_trackers/dri/common/dri_drawable.c|  4 
 src/gallium/state_trackers/dri/drm/dri2.c   | 10 +++---
 23 files changed, 182 insertions(+), 3 deletions(-)

diff --git a/src/gallium/docs/source/context.rst 
b/src/gallium/docs/source/context.rst
index 95f6b22..d5b4d77 100644
--- a/src/gallium/docs/source/context.rst
+++ b/src/gallium/docs/source/context.rst
@@ -423,6 +423,19 @@ Flushing
 ``flush``
 
 
+``flush_resource``
+
+Flush the resource cache, so that the resource can be used
+by an external client. Possible usage:
+- flushing a resource before presenting it on the screen
+- flushing a resource if some other process or device wants to use it
+This shouldn't be used to flush caches if the resource is only managed
+by a single pipe_screen and is not shared with another process.
+(i.e. you shouldn't use it to flush caches explicitly if you want to e.g.
+use the resource for texturing)
+
+
+
 Resource Busy Queries
 ^
 
diff --git a/src/gallium/drivers/freedreno/freedreno_resource.c 
b/src/gallium/drivers/freedreno/freedreno_resource.c
index 3e051ea..3a7e31c 100644
--- a/src/gallium/drivers/freedreno/freedreno_resource.c
+++ b/src/gallium/drivers/freedreno/freedreno_resource.c
@@ -337,6 +337,11 @@ render_blit(struct pipe_context *pctx, struct 
pipe_blit_info *info)
return true;
 }
 
+static void
+fd_flush_resource(struct pipe_context *ctx, struct pipe_resource *resource)
+{
+}
+
 void
 fd_resource_screen_init(struct pipe_screen *pscreen)
 {
@@ -357,4 +362,5 @@ fd_resource_context_init(struct pipe_context *pctx)
pctx->surface_destroy = fd_surface_destroy;
pctx->resource_copy_region = fd_resource_copy_region;
pctx->blit = fd_blit;
+   pctx->flush_resource = fd_flush_resource;
 }
diff --git a/src/gallium/drivers/galahad/glhd_context.c 
b/src/gallium/drivers/galahad/glhd_context.c
index ee9de05..4b51b77 100644
--- a/src/gallium/drivers/galahad/glhd_context.c
+++ b/src/gallium/drivers/galahad/glhd_context.c
@@ -783,6 +783,12 @@ galahad_context_blit(struct pipe_context *_pipe,
 }
 
 static void
+galahad_context_flush_resource(struct pipe_context *ctx,
+   struct pipe_resource *resource)
+{
+}
+
+static void
 galahad_context_clear(struct pipe_context *_pipe,
unsigned buffers,
const union pipe_color_union *color,
@@ -1096,6 +1102,7 @@ galahad_context_create(struct pipe_screen *_screen, 
struct pipe_context *pipe)
GLHD_PIPE_INIT(set_stream_output_targets);
GLHD_PIPE_INIT(resource_copy_region);
GLHD_PIPE_INIT(blit);
+   GLHD_PIPE_INIT(flush_resource);
GLHD_PIPE_INIT(clear);
GLHD_PIPE_INIT(clear_render_target);
GLHD_PIPE_INIT(clear_depth_stencil);
diff --git a/src/gallium/drivers/i915/i915_surface.c 
b/src/gallium/drivers/i915/i915_surface.c
index b8eef89..48d4857 100644
--- a/src/gallium/drivers/i915/i915_surface.c
+++ b/src/gallium/drivers/i915/i915_surface.c
@@ -240,6 +240,11 @@ i915_blit(struct pipe_context *pipe, const struct 
pipe_blit_info *blit_info)
 }
 
 static void
+i915_flush_resource(struct pipe_context *ctx, struct pipe_resource *resource)
+{
+}
+
+static void
 i915_clear_render_target_blitter(struct pipe_context *pipe,
  struct pipe_surface *dst,
  const union pipe_color_union *color,
@@ -359,6 +364,7 @@ i915_init_sur

[Mesa-dev] [PATCH 2/3] r600g: add support for separately allocated CMASKs

2013-09-09 Thread Grigori Goronzy
---
 src/gallium/drivers/r600/evergreen_state.c | 24 +++-
 src/gallium/drivers/r600/r600_hw_context.c | 12 +---
 src/gallium/drivers/r600/r600_resource.h   |  3 +++
 src/gallium/drivers/r600/r600_texture.c| 25 -
 4 files changed, 55 insertions(+), 9 deletions(-)

diff --git a/src/gallium/drivers/r600/evergreen_state.c 
b/src/gallium/drivers/r600/evergreen_state.c
index 887f736..469b3a3 100644
--- a/src/gallium/drivers/r600/evergreen_state.c
+++ b/src/gallium/drivers/r600/evergreen_state.c
@@ -1555,8 +1555,11 @@ void evergreen_init_color_surface(struct r600_context 
*rctx,
surf->export_16bpc = true;
}
 
-   if (rtex->fmask_size && rtex->cmask_size) {
-   color_info |= S_028C70_COMPRESSION(1) | S_028C70_FAST_CLEAR(1);
+   if (rtex->fmask_size) {
+   color_info |= S_028C70_COMPRESSION(1);
+   }
+   if (rtex->cmask_size) {
+   color_info |= S_028C70_FAST_CLEAR(1);
}
 
base_offset = r600_resource_va(rctx->b.b.screen, pipe_tex);
@@ -1574,11 +1577,15 @@ void evergreen_init_color_surface(struct r600_context 
*rctx,
  
S_028C6C_SLICE_MAX(surf->base.u.tex.last_layer);
}
surf->cb_color_attrib = color_attrib;
-   if (rtex->fmask_size && rtex->cmask_size) {
+   if (rtex->fmask_size) {
surf->cb_color_fmask = (base_offset + rtex->fmask_offset) >> 8;
-   surf->cb_color_cmask = (base_offset + rtex->cmask_offset) >> 8;
} else {
surf->cb_color_fmask = surf->cb_color_base;
+   }
+   if (rtex->cmask_size) {
+   uint64_t va = r600_resource_va(rctx->b.b.screen, 
&rtex->cmask->b.b);
+   surf->cb_color_cmask = (va + rtex->cmask_offset) >> 8;
+   } else {
surf->cb_color_cmask = surf->cb_color_base;
}
surf->cb_color_fmask_slice = 
S_028C88_TILE_MAX(rtex->fmask_slice_tile_max);
@@ -2180,6 +2187,13 @@ static void evergreen_emit_framebuffer_state(struct 
r600_context *rctx, struct r
   &rctx->b.rings.gfx,
   (struct 
r600_resource*)cb->base.texture,
   RADEON_USAGE_READWRITE);
+   unsigned cmask_reloc = 0;
+   if (tex->cmask && tex->cmask != &tex->resource) {
+   cmask_reloc = r600_context_bo_reloc(&rctx->b, 
&rctx->b.rings.gfx,
+   tex->cmask, RADEON_USAGE_READWRITE);
+   } else {
+   cmask_reloc = reloc;
+   }
 
r600_write_context_reg_seq(cs, R_028C60_CB_COLOR0_BASE + i * 
0x3C, 13);
radeon_emit(cs, cb->cb_color_base); /* 
R_028C60_CB_COLOR0_BASE */
@@ -2208,7 +,7 @@ static void evergreen_emit_framebuffer_state(struct 
r600_context *rctx, struct r
radeon_emit(cs, reloc);
 
radeon_emit(cs, PKT3(PKT3_NOP, 0, 0)); /* 
R_028C7C_CB_COLOR0_CMASK */
-   radeon_emit(cs, reloc);
+   radeon_emit(cs, cmask_reloc);
 
radeon_emit(cs, PKT3(PKT3_NOP, 0, 0)); /* 
R_028C84_CB_COLOR0_FMASK */
radeon_emit(cs, reloc);
diff --git a/src/gallium/drivers/r600/r600_hw_context.c 
b/src/gallium/drivers/r600/r600_hw_context.c
index d985af9..c2cd521 100644
--- a/src/gallium/drivers/r600/r600_hw_context.c
+++ b/src/gallium/drivers/r600/r600_hw_context.c
@@ -644,11 +644,11 @@ void r600_flag_resource_cache_flush(struct r600_context 
*rctx,
 
/* Check colorbuffers. */
for (i = 0; i < rctx->framebuffer.state.nr_cbufs; i++) {
+   struct r600_texture *tex =
+   (struct 
r600_texture*)rctx->framebuffer.state.cbufs[i]->texture;
+
if (rctx->framebuffer.state.cbufs[i] &&
rctx->framebuffer.state.cbufs[i]->texture == res) {
-   struct r600_texture *tex =
-   (struct 
r600_texture*)rctx->framebuffer.state.cbufs[i]->texture;
-
rctx->b.flags |= R600_CONTEXT_FLUSH_AND_INV_CB |
   R600_CONTEXT_FLUSH_AND_INV |
   R600_CONTEXT_WAIT_3D_IDLE;
@@ -658,6 +658,12 @@ void r600_flag_resource_cache_flush(struct r600_context 
*rctx,
}
break;
}
+
+   if (tex && tex->cmask && tex->cmask != &tex->resource && 
&tex->cmask->b.b == res) {
+   rctx->b.flags |= R600_CONTEXT_FLUSH_AND_INV_CB_META |
+  R600_CONTEXT_FLUSH_AND_INV |
+  R600_CONTEXT_WAIT_3D_IDLE;
+   }
}
 
/* Check a depth buffer. */
diff --git a/src/gallium/drivers/r600/r600_resource.h 
b/src/gallium/drive

[Mesa-dev] [PATCH 3/3] r600g: fast color clears for single-sample buffers

2013-09-09 Thread Grigori Goronzy
Allocate a CMASK on demand and use it to fast clear single-sample
colorbuffers. Both FBOs and window system colorbuffers are fast
cleared. Expand as needed when colorbuffers are mapped or displayed
on screen.
---
 src/gallium/drivers/r600/evergreen_state.c   | 11 +
 src/gallium/drivers/r600/r600_blit.c | 74 
 src/gallium/drivers/r600/r600_pipe.c |  4 ++
 src/gallium/drivers/r600/r600_pipe.h |  2 +
 src/gallium/drivers/r600/r600_state_common.c |  2 +-
 src/gallium/drivers/r600/r600_texture.c  |  5 +-
 6 files changed, 86 insertions(+), 12 deletions(-)

diff --git a/src/gallium/drivers/r600/evergreen_state.c 
b/src/gallium/drivers/r600/evergreen_state.c
index 469b3a3..d9447cd 100644
--- a/src/gallium/drivers/r600/evergreen_state.c
+++ b/src/gallium/drivers/r600/evergreen_state.c
@@ -3567,6 +3567,17 @@ void *evergreen_create_decompress_blend(struct 
r600_context *rctx)
return evergreen_create_blend_state_mode(&rctx->b.b, &blend, mode);
 }
 
+void *evergreen_create_fastclear_blend(struct r600_context *rctx)
+{
+   struct pipe_blend_state blend;
+   unsigned mode = V_028808_CB_ELIMINATE_FAST_CLEAR;
+
+   memset(&blend, 0, sizeof(blend));
+   blend.independent_blend_enable = true;
+   blend.rt[0].colormask = 0xf;
+   return evergreen_create_blend_state_mode(&rctx->b.b, &blend, mode);
+}
+
 void *evergreen_create_db_flush_dsa(struct r600_context *rctx)
 {
struct pipe_depth_stencil_alpha_state dsa = {{0}};
diff --git a/src/gallium/drivers/r600/r600_blit.c 
b/src/gallium/drivers/r600/r600_blit.c
index f87ce4c..fe5de28 100644
--- a/src/gallium/drivers/r600/r600_blit.c
+++ b/src/gallium/drivers/r600/r600_blit.c
@@ -24,6 +24,7 @@
 #include "util/u_surface.h"
 #include "util/u_blitter.h"
 #include "util/u_format.h"
+#include "evergreend.h"
 
 enum r600_blitter_op /* bitmask */
 {
@@ -311,7 +312,8 @@ static void r600_blit_decompress_color(struct pipe_context 
*ctx,
cbsurf = ctx->create_surface(ctx, &rtex->resource.b.b, 
&surf_tmpl);
 
r600_blitter_begin(ctx, R600_DECOMPRESS);
-   util_blitter_custom_color(rctx->blitter, cbsurf, 
rctx->custom_blend_decompress);
+   util_blitter_custom_color(rctx->blitter, cbsurf,
+   rtex->fmask_size ? 
rctx->custom_blend_decompress : rctx->custom_blend_fastclear);
r600_blitter_end(ctx);
 
pipe_surface_reference(&cbsurf, NULL);
@@ -341,7 +343,7 @@ void r600_decompress_color_textures(struct r600_context 
*rctx,
assert(view);
 
tex = (struct r600_texture *)view->texture;
-   assert(tex->cmask_size && tex->fmask_size);
+   assert(tex->cmask_size);
 
r600_blit_decompress_color(&rctx->b.b, tex,
   view->u.tex.first_level, 
view->u.tex.last_level,
@@ -376,7 +378,7 @@ static bool r600_decompress_subresource(struct pipe_context 
*ctx,
   first_layer, last_layer,
   0, u_max_sample(tex));
}
-   } else if (rtex->fmask_size && rtex->cmask_size) {
+   } else if (rtex->cmask_size) {
r600_blit_decompress_color(ctx, rtex, level, level,
   first_layer, last_layer);
}
@@ -432,6 +434,27 @@ static void evergreen_set_clear_color(struct pipe_surface 
*cbuf,
memcpy(clear_value, &uc, 2 * sizeof(uint32_t));
 }
 
+static void evergreen_check_alloc_cmask(struct pipe_context *ctx,
+struct pipe_surface *cbuf)
+{
+struct r600_context *rctx = (struct r600_context *)ctx;
+struct r600_texture *tex = (struct r600_texture *)cbuf->texture;
+struct r600_surface *surf = (struct r600_surface *)cbuf;
+
+if (tex->cmask)
+return;
+
+r600_texture_init_cmask(rctx->screen, tex);
+
+/* update colorbuffer state bits */
+if (tex->cmask != NULL) {
+uint64_t va = r600_resource_va(rctx->b.b.screen, 
&tex->cmask->b.b);
+surf->cb_color_cmask = va >> 8;
+surf->cb_color_cmask_slice = 
S_028C80_TILE_MAX(tex->cmask_slice_tile_max);
+surf->cb_color_info |= S_028C70_FAST_CLEAR(1);
+}
+}
+
 static bool can_fast_clear_color(struct pipe_context *ctx)
 {
struct r600_context *rctx = (struct r600_context *)ctx;
@@ -445,10 +468,6 @@ static bool can_fast_clear_color(struct pipe_context *ctx)
for (i = 0; i < fb->nr_cbufs; i++) {
struct r600_texture *tex = (struct r600_texture 
*)fb->cbufs[i]->texture;
 
-   if (tex->cmask_size == 0) {
-   return false;
-   }
-
/* 128-bit formats are unuspported */
if (util_for

[Mesa-dev] [PATCH] st/dri: do not create a new context for msaa copy

2013-09-09 Thread Maarten Lankhorst
Commit b77316ad7594f
st/dri: always copy new DRI front and back buffers to corresponding MSAA 
buffers

introduced creating a pipe_context for every call to validate, which is not 
required
because the callers have a context anyway.

Only exception is egl_g3d_create_pbuffer_from_client_buffer, can someone test 
if it
still works with NULL passed as context for validate? From examining the code I
believe it does, but I didn't thoroughly test it.

Signed-off-by: Maarten Lankhorst 
Cc: 9.2 
---
diff --git a/src/gallium/include/state_tracker/st_api.h 
b/src/gallium/include/state_tracker/st_api.h
index 3ecd12e..9dcb76f 100644
--- a/src/gallium/include/state_tracker/st_api.h
+++ b/src/gallium/include/state_tracker/st_api.h
@@ -342,7 +342,8 @@ struct st_framebuffer_iface
 * the last call might be destroyed.  This behavior might change in the
 * future.
 */
-   boolean (*validate)(struct st_framebuffer_iface *stfbi,
+   boolean (*validate)(struct st_context_iface *stctx,
+   struct st_framebuffer_iface *stfbi,
const enum st_attachment_type *statts,
unsigned count,
struct pipe_resource **out);
diff --git a/src/gallium/state_trackers/dri/common/dri_drawable.c 
b/src/gallium/state_trackers/dri/common/dri_drawable.c
index 18d8d89..ddf9400 100644
--- a/src/gallium/state_trackers/dri/common/dri_drawable.c
+++ b/src/gallium/state_trackers/dri/common/dri_drawable.c
@@ -42,11 +42,13 @@ static void
 swap_fences_unref(struct dri_drawable *draw);
 
 static boolean
-dri_st_framebuffer_validate(struct st_framebuffer_iface *stfbi,
+dri_st_framebuffer_validate(struct st_context_iface *stctx,
+struct st_framebuffer_iface *stfbi,
 const enum st_attachment_type *statts,
 unsigned count,
 struct pipe_resource **out)
 {
+   struct dri_context *ctx = (struct dri_context *)stctx->st_manager_private;
struct dri_drawable *drawable =
   (struct dri_drawable *) stfbi->st_manager_private;
struct dri_screen *screen = dri_screen(drawable->sPriv);
@@ -78,7 +80,7 @@ dri_st_framebuffer_validate(struct st_framebuffer_iface 
*stfbi,
  if (new_stamp && drawable->update_drawable_info)
 drawable->update_drawable_info(drawable);
 
- drawable->allocate_textures(drawable, statts, count);
+ drawable->allocate_textures(ctx, drawable, statts, count);
 
  /* add existing textures */
  for (i = 0; i < ST_ATTACHMENT_COUNT; i++) {
@@ -183,7 +185,8 @@ dri_destroy_buffer(__DRIdrawable * dPriv)
  * exist.  Used by the TFP extension.
  */
 static void
-dri_drawable_validate_att(struct dri_drawable *drawable,
+dri_drawable_validate_att(struct dri_context *ctx,
+  struct dri_drawable *drawable,
   enum st_attachment_type statt)
 {
enum st_attachment_type statts[ST_ATTACHMENT_COUNT];
@@ -203,7 +206,7 @@ dri_drawable_validate_att(struct dri_drawable *drawable,
 
drawable->texture_stamp = drawable->dPriv->lastStamp - 1;
 
-   drawable->base.validate(&drawable->base, statts, count, NULL);
+   drawable->base.validate(ctx->st, &drawable->base, statts, count, NULL);
 }
 
 /**
@@ -217,7 +220,7 @@ dri_set_tex_buffer2(__DRIcontext *pDRICtx, GLint target,
struct dri_drawable *drawable = dri_drawable(dPriv);
struct pipe_resource *pt;
 
-   dri_drawable_validate_att(drawable, ST_ATTACHMENT_FRONT_LEFT);
+   dri_drawable_validate_att(ctx, drawable, ST_ATTACHMENT_FRONT_LEFT);
 
/* Use the pipe resource associated with the X drawable */
pt = drawable->textures[ST_ATTACHMENT_FRONT_LEFT];
diff --git a/src/gallium/state_trackers/dri/common/dri_drawable.h 
b/src/gallium/state_trackers/dri/common/dri_drawable.h
index 50e5cc4..c514218 100644
--- a/src/gallium/state_trackers/dri/common/dri_drawable.h
+++ b/src/gallium/state_trackers/dri/common/dri_drawable.h
@@ -71,7 +71,8 @@ struct dri_drawable
struct pipe_surface *drisw_surface;
 
/* hooks filled in by dri2 & drisw */
-   void (*allocate_textures)(struct dri_drawable *drawable,
+   void (*allocate_textures)(struct dri_context *ctx,
+ struct dri_drawable *drawable,
  const enum st_attachment_type *statts,
  unsigned count);
 
diff --git a/src/gallium/state_trackers/dri/drm/dri2.c 
b/src/gallium/state_trackers/dri/drm/dri2.c
index 1dcc1f7..036fe32 100644
--- a/src/gallium/state_trackers/dri/drm/dri2.c
+++ b/src/gallium/state_trackers/dri/drm/dri2.c
@@ -169,7 +169,8 @@ dri2_drawable_get_buffers(struct dri_drawable *drawable,
  * Process __DRIbuffer and convert them into pipe_resources.
  */
 static void
-dri2_drawable_process_buffers(struct dri_drawable *drawable,
+dri2_drawable_process_buffers(struct dri_context *ctx,
+  struct dri_drawable *drawable,
  

Re: [Mesa-dev] [PATCH 1/2] gallium: add PIPE_CAP_MIXED_FRAMEBUFFER_SIZES

2013-09-09 Thread Roland Scheidegger
I'm not really convinced of this idea.
There's already PIPE_CAP_MIXED_COLORBUFFER_FORMATS which is sort of
similar - a "requirement" of ARB_fbo, but it isn't used to determine
support of ARB_fbo or not (my guess is drivers want to advertize ARB_fbo
even if they can't do it, and ARB_fbo doesn't really have a hard
requirement for anything as you always can say "no" to fb supported).
So I can't see why not supporting different width/height is treated
different to not supporting different formats.
(For that matter I'm not sure if a new cap bit is needed not supporting
different formats might affect exactly the same drivers as not
supporting different width/height though I'm not sure of that. In any
case though I guess the way the fb validation is done, that is without
any direct driver feedback, it will remain problematic for drivers which
can't support everything always.)

Roland


Am 08.09.2013 05:00, schrieb Ilia Mirkin:
> This CAP will determine whether ARB_framebuffer_object can be enabled.
> The nv30 driver does not allow mixing swizzled and linear zsbuf/cbufs.
> 
> Signed-off-by: Ilia Mirkin 
> ---
> 
> Christoph, this is what I understood you suggested I do. Hopefully I got it
> right. I've split this up into two patches, one that adds the cap, one that
> actually makes use of it, feel free to squash them together if you prefer.
> 
> My explanation of the cap in screen.rst is not great, largely due to my lack
> of understanding of the major issue, i.e. precisely what can't be mixed. Feel
> free to reword, or let me know a better wording and I can resend the patch.
> 
> By fiddling with the cap being on and off, I was able to test that it
> correctly affects ARB_framebuffer_object being exposed/hidden (with the second
> patch in place).
> 
>  src/gallium/docs/source/screen.rst   | 4 
>  src/gallium/drivers/i915/i915_screen.c   | 1 +
>  src/gallium/drivers/ilo/ilo_screen.c | 1 +
>  src/gallium/drivers/llvmpipe/lp_screen.c | 1 +
>  src/gallium/drivers/nv30/nv30_screen.c   | 1 +
>  src/gallium/drivers/nv50/nv50_screen.c   | 1 +
>  src/gallium/drivers/nvc0/nvc0_screen.c   | 1 +
>  src/gallium/drivers/r300/r300_screen.c   | 1 +
>  src/gallium/drivers/r600/r600_pipe.c | 1 +
>  src/gallium/drivers/radeonsi/radeonsi_pipe.c | 1 +
>  src/gallium/drivers/softpipe/sp_screen.c | 1 +
>  src/gallium/drivers/svga/svga_screen.c   | 1 +
>  src/gallium/include/pipe/p_defines.h | 3 ++-
>  13 files changed, 17 insertions(+), 1 deletion(-)
> 
> diff --git a/src/gallium/docs/source/screen.rst 
> b/src/gallium/docs/source/screen.rst
> index d19cd1a..238cc62 100644
> --- a/src/gallium/docs/source/screen.rst
> +++ b/src/gallium/docs/source/screen.rst
> @@ -173,6 +173,10 @@ The integer capabilities:
>viewport/scissor combination.  
>  * ''PIPE_CAP_ENDIANNESS``:: The endianness of the device.  Either
>PIPE_ENDIAN_BIG or PIPE_ENDIAN_LITTLE.
> +* ``PIPE_CAP_MIXED_FRAMEBUFFER_SIZES``: Whether it is possible to mix
> +  framebuffer sizes. This controls whether ARB_framebuffer_object is
> +  provided. For example NV30/NV40 is unable to deal with swizzled and linear
> +  formats at the same time.
>  
>  
>  .. _pipe_capf:
> diff --git a/src/gallium/drivers/i915/i915_screen.c 
> b/src/gallium/drivers/i915/i915_screen.c
> index 556dda8..77607d0 100644
> --- a/src/gallium/drivers/i915/i915_screen.c
> +++ b/src/gallium/drivers/i915/i915_screen.c
> @@ -172,6 +172,7 @@ i915_get_param(struct pipe_screen *screen, enum pipe_cap 
> cap)
> /* Supported features (boolean caps). */
> case PIPE_CAP_ANISOTROPIC_FILTER:
> case PIPE_CAP_NPOT_TEXTURES:
> +   case PIPE_CAP_MIXED_FRAMEBUFFER_SIZES:
> case PIPE_CAP_POINT_SPRITE:
> case PIPE_CAP_PRIMITIVE_RESTART: /* draw module */
> case PIPE_CAP_TEXTURE_SHADOW_MAP:
> diff --git a/src/gallium/drivers/ilo/ilo_screen.c 
> b/src/gallium/drivers/ilo/ilo_screen.c
> index 3f8d431..ddf11ff 100644
> --- a/src/gallium/drivers/ilo/ilo_screen.c
> +++ b/src/gallium/drivers/ilo/ilo_screen.c
> @@ -286,6 +286,7 @@ ilo_get_param(struct pipe_screen *screen, enum pipe_cap 
> param)
>  
> switch (param) {
> case PIPE_CAP_NPOT_TEXTURES:
> +   case PIPE_CAP_MIXED_FRAMEBUFFER_SIZES:
> case PIPE_CAP_TWO_SIDED_STENCIL:
>return true;
> case PIPE_CAP_MAX_DUAL_SOURCE_RENDER_TARGETS:
> diff --git a/src/gallium/drivers/llvmpipe/lp_screen.c 
> b/src/gallium/drivers/llvmpipe/lp_screen.c
> index b3cd77f..2bbc2c9 100644
> --- a/src/gallium/drivers/llvmpipe/lp_screen.c
> +++ b/src/gallium/drivers/llvmpipe/lp_screen.c
> @@ -109,6 +109,7 @@ llvmpipe_get_param(struct pipe_screen *screen, enum 
> pipe_cap param)
> case PIPE_CAP_MAX_COMBINED_SAMPLERS:
>return 2 * PIPE_MAX_SAMPLERS;  /* VS + FS samplers */
> case PIPE_CAP_NPOT_TEXTURES:
> +   case PIPE_CAP_MIXED_FRAMEBUFFER_SIZES:
>return 1;
> case PIPE_CAP_TWO_SIDED_STENCIL:
>return 1;
> diff --git a/src/gallium/drivers/nv30/nv30_screen.c

Re: [Mesa-dev] [PATCH 1/2] gallium: add PIPE_CAP_MIXED_FRAMEBUFFER_SIZES

2013-09-09 Thread Erik Faye-Lund
On Mon, Sep 9, 2013 at 1:31 PM, Roland Scheidegger  wrote:
> I'm not really convinced of this idea.
> There's already PIPE_CAP_MIXED_COLORBUFFER_FORMATS which is sort of
> similar - a "requirement" of ARB_fbo, but it isn't used to determine
> support of ARB_fbo or not (my guess is drivers want to advertize ARB_fbo
> even if they can't do it, and ARB_fbo doesn't really have a hard
> requirement for anything as you always can say "no" to fb supported).
> So I can't see why not supporting different width/height is treated
> different to not supporting different formats.

Actually, ARB_framebuffer_object does have a hard requirement on
rendering to differently sized attachments. FRAMEBUFFER_UNSUPPORTED is
only allowed for format-issues. EXT_framebuffer_objects can still be
supported on hardware that cannot meet this requirement.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] st/dri: do not create a new context for msaa copy

2013-09-09 Thread Marek Olšák
Reviewed-by: Marek Olšák 

Marek

On Mon, Sep 9, 2013 at 1:02 PM, Maarten Lankhorst
 wrote:
> Commit b77316ad7594f
> st/dri: always copy new DRI front and back buffers to corresponding MSAA 
> buffers
>
> introduced creating a pipe_context for every call to validate, which is not 
> required
> because the callers have a context anyway.
>
> Only exception is egl_g3d_create_pbuffer_from_client_buffer, can someone test 
> if it
> still works with NULL passed as context for validate? From examining the code 
> I
> believe it does, but I didn't thoroughly test it.
>
> Signed-off-by: Maarten Lankhorst 
> Cc: 9.2 
> ---
> diff --git a/src/gallium/include/state_tracker/st_api.h 
> b/src/gallium/include/state_tracker/st_api.h
> index 3ecd12e..9dcb76f 100644
> --- a/src/gallium/include/state_tracker/st_api.h
> +++ b/src/gallium/include/state_tracker/st_api.h
> @@ -342,7 +342,8 @@ struct st_framebuffer_iface
>  * the last call might be destroyed.  This behavior might change in the
>  * future.
>  */
> -   boolean (*validate)(struct st_framebuffer_iface *stfbi,
> +   boolean (*validate)(struct st_context_iface *stctx,
> +   struct st_framebuffer_iface *stfbi,
> const enum st_attachment_type *statts,
> unsigned count,
> struct pipe_resource **out);
> diff --git a/src/gallium/state_trackers/dri/common/dri_drawable.c 
> b/src/gallium/state_trackers/dri/common/dri_drawable.c
> index 18d8d89..ddf9400 100644
> --- a/src/gallium/state_trackers/dri/common/dri_drawable.c
> +++ b/src/gallium/state_trackers/dri/common/dri_drawable.c
> @@ -42,11 +42,13 @@ static void
>  swap_fences_unref(struct dri_drawable *draw);
>
>  static boolean
> -dri_st_framebuffer_validate(struct st_framebuffer_iface *stfbi,
> +dri_st_framebuffer_validate(struct st_context_iface *stctx,
> +struct st_framebuffer_iface *stfbi,
>  const enum st_attachment_type *statts,
>  unsigned count,
>  struct pipe_resource **out)
>  {
> +   struct dri_context *ctx = (struct dri_context *)stctx->st_manager_private;
> struct dri_drawable *drawable =
>(struct dri_drawable *) stfbi->st_manager_private;
> struct dri_screen *screen = dri_screen(drawable->sPriv);
> @@ -78,7 +80,7 @@ dri_st_framebuffer_validate(struct st_framebuffer_iface 
> *stfbi,
>   if (new_stamp && drawable->update_drawable_info)
>  drawable->update_drawable_info(drawable);
>
> - drawable->allocate_textures(drawable, statts, count);
> + drawable->allocate_textures(ctx, drawable, statts, count);
>
>   /* add existing textures */
>   for (i = 0; i < ST_ATTACHMENT_COUNT; i++) {
> @@ -183,7 +185,8 @@ dri_destroy_buffer(__DRIdrawable * dPriv)
>   * exist.  Used by the TFP extension.
>   */
>  static void
> -dri_drawable_validate_att(struct dri_drawable *drawable,
> +dri_drawable_validate_att(struct dri_context *ctx,
> +  struct dri_drawable *drawable,
>enum st_attachment_type statt)
>  {
> enum st_attachment_type statts[ST_ATTACHMENT_COUNT];
> @@ -203,7 +206,7 @@ dri_drawable_validate_att(struct dri_drawable *drawable,
>
> drawable->texture_stamp = drawable->dPriv->lastStamp - 1;
>
> -   drawable->base.validate(&drawable->base, statts, count, NULL);
> +   drawable->base.validate(ctx->st, &drawable->base, statts, count, NULL);
>  }
>
>  /**
> @@ -217,7 +220,7 @@ dri_set_tex_buffer2(__DRIcontext *pDRICtx, GLint target,
> struct dri_drawable *drawable = dri_drawable(dPriv);
> struct pipe_resource *pt;
>
> -   dri_drawable_validate_att(drawable, ST_ATTACHMENT_FRONT_LEFT);
> +   dri_drawable_validate_att(ctx, drawable, ST_ATTACHMENT_FRONT_LEFT);
>
> /* Use the pipe resource associated with the X drawable */
> pt = drawable->textures[ST_ATTACHMENT_FRONT_LEFT];
> diff --git a/src/gallium/state_trackers/dri/common/dri_drawable.h 
> b/src/gallium/state_trackers/dri/common/dri_drawable.h
> index 50e5cc4..c514218 100644
> --- a/src/gallium/state_trackers/dri/common/dri_drawable.h
> +++ b/src/gallium/state_trackers/dri/common/dri_drawable.h
> @@ -71,7 +71,8 @@ struct dri_drawable
> struct pipe_surface *drisw_surface;
>
> /* hooks filled in by dri2 & drisw */
> -   void (*allocate_textures)(struct dri_drawable *drawable,
> +   void (*allocate_textures)(struct dri_context *ctx,
> + struct dri_drawable *drawable,
>   const enum st_attachment_type *statts,
>   unsigned count);
>
> diff --git a/src/gallium/state_trackers/dri/drm/dri2.c 
> b/src/gallium/state_trackers/dri/drm/dri2.c
> index 1dcc1f7..036fe32 100644
> --- a/src/gallium/state_trackers/dri/drm/dri2.c
> +++ b/src/gallium/state_trackers/dri/drm/dri2.c
> @@ -169,7 +169,8 @@ dri2_drawable_get_buffers(struct dri_drawable *

Re: [Mesa-dev] [PATCH 1/2] gallium: add PIPE_CAP_MIXED_FRAMEBUFFER_SIZES

2013-09-09 Thread Roland Scheidegger
Am 09.09.2013 13:55, schrieb Erik Faye-Lund:
> On Mon, Sep 9, 2013 at 1:31 PM, Roland Scheidegger  wrote:
>> I'm not really convinced of this idea.
>> There's already PIPE_CAP_MIXED_COLORBUFFER_FORMATS which is sort of
>> similar - a "requirement" of ARB_fbo, but it isn't used to determine
>> support of ARB_fbo or not (my guess is drivers want to advertize ARB_fbo
>> even if they can't do it, and ARB_fbo doesn't really have a hard
>> requirement for anything as you always can say "no" to fb supported).
>> So I can't see why not supporting different width/height is treated
>> different to not supporting different formats.
> 
> Actually, ARB_framebuffer_object does have a hard requirement on
> rendering to differently sized attachments. FRAMEBUFFER_UNSUPPORTED is
> only allowed for format-issues. EXT_framebuffer_objects can still be
> supported on hardware that cannot meet this requirement
> 

Ah you're right. For some reason I thought it would be permitted for a
driver to return unsupported framebuffer for any reason.
So this makes sense then.
(Though I'm wondering if we really need the
PIPE_CAP_MIXED_COLORBUFFER_FORMATS then since I don't think there's any
driver which can do different width/height hence support ARB_fbo but not
actually support mixed formats, r300 and softpipe might be candidates
though I don't know why the latter wouldn't support mixed formats as it
claims not to and have no idea if the former can support different
width/height.)

Roland
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/2] gallium: add PIPE_CAP_MIXED_FRAMEBUFFER_SIZES

2013-09-09 Thread Marek Olšák
On r300, colorbuffers can have different pitches, therefore each of
them can be of arbitrary size. Other than that, there is a separate
framebuffer width and height state, which applies to all colorbuffers
(it's usually set to the minimum of all widths and heights as in
OpenGL).

r300 doesn't support mixed formats (r500 does), but it does support
mixed sizes and various other features like MSAA.

PIPE_CAP_MIXED_COLORBUFFER_FORMATS doesn't affect exposed extensions,
but it affects FBO completeness.

Marek

On Mon, Sep 9, 2013 at 2:36 PM, Roland Scheidegger  wrote:
> Am 09.09.2013 13:55, schrieb Erik Faye-Lund:
>> On Mon, Sep 9, 2013 at 1:31 PM, Roland Scheidegger  
>> wrote:
>>> I'm not really convinced of this idea.
>>> There's already PIPE_CAP_MIXED_COLORBUFFER_FORMATS which is sort of
>>> similar - a "requirement" of ARB_fbo, but it isn't used to determine
>>> support of ARB_fbo or not (my guess is drivers want to advertize ARB_fbo
>>> even if they can't do it, and ARB_fbo doesn't really have a hard
>>> requirement for anything as you always can say "no" to fb supported).
>>> So I can't see why not supporting different width/height is treated
>>> different to not supporting different formats.
>>
>> Actually, ARB_framebuffer_object does have a hard requirement on
>> rendering to differently sized attachments. FRAMEBUFFER_UNSUPPORTED is
>> only allowed for format-issues. EXT_framebuffer_objects can still be
>> supported on hardware that cannot meet this requirement
>>
>
> Ah you're right. For some reason I thought it would be permitted for a
> driver to return unsupported framebuffer for any reason.
> So this makes sense then.
> (Though I'm wondering if we really need the
> PIPE_CAP_MIXED_COLORBUFFER_FORMATS then since I don't think there's any
> driver which can do different width/height hence support ARB_fbo but not
> actually support mixed formats, r300 and softpipe might be candidates
> though I don't know why the latter wouldn't support mixed formats as it
> claims not to and have no idea if the former can support different
> width/height.)
>
> Roland
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/3] r600g: add support for separately allocated CMASKs

2013-09-09 Thread Marek Olšák
On Mon, Sep 9, 2013 at 11:52 AM, Grigori Goronzy  wrote:
> ---
>  src/gallium/drivers/r600/evergreen_state.c | 24 +++-
>  src/gallium/drivers/r600/r600_hw_context.c | 12 +---
>  src/gallium/drivers/r600/r600_resource.h   |  3 +++
>  src/gallium/drivers/r600/r600_texture.c| 25 -
>  4 files changed, 55 insertions(+), 9 deletions(-)
>
> diff --git a/src/gallium/drivers/r600/evergreen_state.c 
> b/src/gallium/drivers/r600/evergreen_state.c
> index 887f736..469b3a3 100644
> --- a/src/gallium/drivers/r600/evergreen_state.c
> +++ b/src/gallium/drivers/r600/evergreen_state.c
> @@ -1555,8 +1555,11 @@ void evergreen_init_color_surface(struct r600_context 
> *rctx,
> surf->export_16bpc = true;
> }
>
> -   if (rtex->fmask_size && rtex->cmask_size) {
> -   color_info |= S_028C70_COMPRESSION(1) | 
> S_028C70_FAST_CLEAR(1);
> +   if (rtex->fmask_size) {
> +   color_info |= S_028C70_COMPRESSION(1);
> +   }
> +   if (rtex->cmask_size) {
> +   color_info |= S_028C70_FAST_CLEAR(1);
> }
>
> base_offset = r600_resource_va(rctx->b.b.screen, pipe_tex);
> @@ -1574,11 +1577,15 @@ void evergreen_init_color_surface(struct r600_context 
> *rctx,
>   
> S_028C6C_SLICE_MAX(surf->base.u.tex.last_layer);
> }
> surf->cb_color_attrib = color_attrib;
> -   if (rtex->fmask_size && rtex->cmask_size) {
> +   if (rtex->fmask_size) {
> surf->cb_color_fmask = (base_offset + rtex->fmask_offset) >> 
> 8;
> -   surf->cb_color_cmask = (base_offset + rtex->cmask_offset) >> 
> 8;
> } else {
> surf->cb_color_fmask = surf->cb_color_base;
> +   }
> +   if (rtex->cmask_size) {
> +   uint64_t va = r600_resource_va(rctx->b.b.screen, 
> &rtex->cmask->b.b);
> +   surf->cb_color_cmask = (va + rtex->cmask_offset) >> 8;
> +   } else {
> surf->cb_color_cmask = surf->cb_color_base;
> }
> surf->cb_color_fmask_slice = 
> S_028C88_TILE_MAX(rtex->fmask_slice_tile_max);
> @@ -2180,6 +2187,13 @@ static void evergreen_emit_framebuffer_state(struct 
> r600_context *rctx, struct r
>&rctx->b.rings.gfx,
>(struct 
> r600_resource*)cb->base.texture,
>
> RADEON_USAGE_READWRITE);
> +   unsigned cmask_reloc = 0;
> +   if (tex->cmask && tex->cmask != &tex->resource) {
> +   cmask_reloc = r600_context_bo_reloc(&rctx->b, 
> &rctx->b.rings.gfx,
> +   tex->cmask, RADEON_USAGE_READWRITE);
> +   } else {
> +   cmask_reloc = reloc;
> +   }
>
> r600_write_context_reg_seq(cs, R_028C60_CB_COLOR0_BASE + i * 
> 0x3C, 13);
> radeon_emit(cs, cb->cb_color_base); /* 
> R_028C60_CB_COLOR0_BASE */
> @@ -2208,7 +,7 @@ static void evergreen_emit_framebuffer_state(struct 
> r600_context *rctx, struct r
> radeon_emit(cs, reloc);
>
> radeon_emit(cs, PKT3(PKT3_NOP, 0, 0)); /* 
> R_028C7C_CB_COLOR0_CMASK */
> -   radeon_emit(cs, reloc);
> +   radeon_emit(cs, cmask_reloc);
>
> radeon_emit(cs, PKT3(PKT3_NOP, 0, 0)); /* 
> R_028C84_CB_COLOR0_FMASK */
> radeon_emit(cs, reloc);
> diff --git a/src/gallium/drivers/r600/r600_hw_context.c 
> b/src/gallium/drivers/r600/r600_hw_context.c
> index d985af9..c2cd521 100644
> --- a/src/gallium/drivers/r600/r600_hw_context.c
> +++ b/src/gallium/drivers/r600/r600_hw_context.c
> @@ -644,11 +644,11 @@ void r600_flag_resource_cache_flush(struct r600_context 
> *rctx,
>
> /* Check colorbuffers. */
> for (i = 0; i < rctx->framebuffer.state.nr_cbufs; i++) {
> +   struct r600_texture *tex =
> +   (struct 
> r600_texture*)rctx->framebuffer.state.cbufs[i]->texture;
> +

Please check if cbufs[i] != NULL.

Marek
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/3] gallium: add flush_resource context function

2013-09-09 Thread Marek Olšák
On Mon, Sep 9, 2013 at 11:52 AM, Grigori Goronzy  wrote:
> From: Marek Olšák 
>
> r600g needs explicit flushing before DRI2 buffers are presented on the screen.
>
> v2: add (stub) implementations for all drivers, fix frontbuffer flushing
> ---
>  src/gallium/docs/source/context.rst | 13 +
>  src/gallium/drivers/freedreno/freedreno_resource.c  |  6 ++
>  src/gallium/drivers/galahad/glhd_context.c  |  7 +++
>  src/gallium/drivers/i915/i915_surface.c |  6 ++
>  src/gallium/drivers/identity/id_context.c   | 11 +++
>  src/gallium/drivers/ilo/ilo_blit.c  |  6 ++
>  src/gallium/drivers/llvmpipe/lp_surface.c   |  7 +++
>  src/gallium/drivers/noop/noop_pipe.c|  8 
>  src/gallium/drivers/nv30/nv30_miptree.c |  6 ++
>  src/gallium/drivers/nv30/nv30_resource.c|  1 +
>  src/gallium/drivers/nv30/nv30_resource.h|  4 
>  src/gallium/drivers/nv50/nv50_surface.c |  7 +++
>  src/gallium/drivers/nvc0/nvc0_surface.c |  7 +++
>  src/gallium/drivers/r300/r300_blit.c|  6 ++
>  src/gallium/drivers/r600/r600_blit.c|  6 ++
>  src/gallium/drivers/radeonsi/r600_blit.c|  6 ++
>  src/gallium/drivers/rbug/rbug_context.c | 15 +++
>  src/gallium/drivers/softpipe/sp_surface.c   |  7 +++
>  src/gallium/drivers/svga/svga_pipe_blit.c   |  8 
>  src/gallium/drivers/trace/tr_context.c  | 21 
> +
>  src/gallium/include/pipe/p_context.h| 13 +
>  .../state_trackers/dri/common/dri_drawable.c|  4 
>  src/gallium/state_trackers/dri/drm/dri2.c   | 10 +++---
>  23 files changed, 182 insertions(+), 3 deletions(-)
>
> diff --git a/src/gallium/docs/source/context.rst 
> b/src/gallium/docs/source/context.rst
> index 95f6b22..d5b4d77 100644
> --- a/src/gallium/docs/source/context.rst
> +++ b/src/gallium/docs/source/context.rst
> @@ -423,6 +423,19 @@ Flushing
>  ``flush``
>
>
> +``flush_resource``
> +
> +Flush the resource cache, so that the resource can be used
> +by an external client. Possible usage:
> +- flushing a resource before presenting it on the screen
> +- flushing a resource if some other process or device wants to use it
> +This shouldn't be used to flush caches if the resource is only managed
> +by a single pipe_screen and is not shared with another process.
> +(i.e. you shouldn't use it to flush caches explicitly if you want to e.g.
> +use the resource for texturing)
> +
> +
> +
>  Resource Busy Queries
>  ^
>
> diff --git a/src/gallium/drivers/freedreno/freedreno_resource.c 
> b/src/gallium/drivers/freedreno/freedreno_resource.c
> index 3e051ea..3a7e31c 100644
> --- a/src/gallium/drivers/freedreno/freedreno_resource.c
> +++ b/src/gallium/drivers/freedreno/freedreno_resource.c
> @@ -337,6 +337,11 @@ render_blit(struct pipe_context *pctx, struct 
> pipe_blit_info *info)
> return true;
>  }
>
> +static void
> +fd_flush_resource(struct pipe_context *ctx, struct pipe_resource *resource)
> +{
> +}
> +
>  void
>  fd_resource_screen_init(struct pipe_screen *pscreen)
>  {
> @@ -357,4 +362,5 @@ fd_resource_context_init(struct pipe_context *pctx)
> pctx->surface_destroy = fd_surface_destroy;
> pctx->resource_copy_region = fd_resource_copy_region;
> pctx->blit = fd_blit;
> +   pctx->flush_resource = fd_flush_resource;
>  }
> diff --git a/src/gallium/drivers/galahad/glhd_context.c 
> b/src/gallium/drivers/galahad/glhd_context.c
> index ee9de05..4b51b77 100644
> --- a/src/gallium/drivers/galahad/glhd_context.c
> +++ b/src/gallium/drivers/galahad/glhd_context.c
> @@ -783,6 +783,12 @@ galahad_context_blit(struct pipe_context *_pipe,
>  }
>
>  static void
> +galahad_context_flush_resource(struct pipe_context *ctx,
> +   struct pipe_resource *resource)
> +{
> +}

galahad is like identity, it should redirect calls to the driver under it.

Marek
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/2] R600/SI: Merge offset0 and offset1 fields for single address DS instructions v2

2013-09-09 Thread Michel Dänzer
On Fre, 2013-09-06 at 11:06 -0400, Tom Stellard wrote:
> From: Tom Stellard 
> 
> Also remove unused data fields from the DS_Load_Helper class.
> 
> v2:
>   - Merge fields for DS_WRITE

[...]

> -class DS_Store_Helper  op, string asm, RegisterClass regClass> : DS <
> +class DS_Store_Helper  op, string asm, RegisterClass regClass> : 
> DS_1A <
>op,
>(outs),
> -  (ins i1imm:$gds, VReg_32:$addr, VReg_32:$data0, VReg_32:$data1,
> -   i8imm:$offset0, i8imm:$offset1),
> -  asm#" $gds, $addr, $data0, $data1, $offset0, $offset1, [M0]",
> +  (ins i1imm:$gds, VReg_32:$addr, VReg_32:$data0, i16imm:$offset),
> +  asm#" $gds, $addr, $data0, $offset [M0]",
>[]> {
>let mayStore = 1;
>let mayLoad = 0;
>let vdst = 0;
>  }

The register class parameter can be removed from this class as well.
Either way,

Reviewed-by: Michel Dänzer 


-- 
Earthling Michel Dänzer   |   http://www.amd.com
Libre software enthusiast |  Debian, X and DRI developer

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 3/3] r600g: fast color clears for single-sample buffers

2013-09-09 Thread Marek Olšák
On Mon, Sep 9, 2013 at 11:52 AM, Grigori Goronzy  wrote:
> Allocate a CMASK on demand and use it to fast clear single-sample
> colorbuffers. Both FBOs and window system colorbuffers are fast
> cleared. Expand as needed when colorbuffers are mapped or displayed
> on screen.
> ---
>  src/gallium/drivers/r600/evergreen_state.c   | 11 +
>  src/gallium/drivers/r600/r600_blit.c | 74 
> 
>  src/gallium/drivers/r600/r600_pipe.c |  4 ++
>  src/gallium/drivers/r600/r600_pipe.h |  2 +
>  src/gallium/drivers/r600/r600_state_common.c |  2 +-
>  src/gallium/drivers/r600/r600_texture.c  |  5 +-
>  6 files changed, 86 insertions(+), 12 deletions(-)
>
> diff --git a/src/gallium/drivers/r600/evergreen_state.c 
> b/src/gallium/drivers/r600/evergreen_state.c
> index 469b3a3..d9447cd 100644
> --- a/src/gallium/drivers/r600/evergreen_state.c
> +++ b/src/gallium/drivers/r600/evergreen_state.c
> @@ -3567,6 +3567,17 @@ void *evergreen_create_decompress_blend(struct 
> r600_context *rctx)
> return evergreen_create_blend_state_mode(&rctx->b.b, &blend, mode);
>  }
>
> +void *evergreen_create_fastclear_blend(struct r600_context *rctx)
> +{
> +   struct pipe_blend_state blend;
> +   unsigned mode = V_028808_CB_ELIMINATE_FAST_CLEAR;
> +
> +   memset(&blend, 0, sizeof(blend));
> +   blend.independent_blend_enable = true;
> +   blend.rt[0].colormask = 0xf;
> +   return evergreen_create_blend_state_mode(&rctx->b.b, &blend, mode);
> +}
> +
>  void *evergreen_create_db_flush_dsa(struct r600_context *rctx)
>  {
> struct pipe_depth_stencil_alpha_state dsa = {{0}};
> diff --git a/src/gallium/drivers/r600/r600_blit.c 
> b/src/gallium/drivers/r600/r600_blit.c
> index f87ce4c..fe5de28 100644
> --- a/src/gallium/drivers/r600/r600_blit.c
> +++ b/src/gallium/drivers/r600/r600_blit.c
> @@ -24,6 +24,7 @@
>  #include "util/u_surface.h"
>  #include "util/u_blitter.h"
>  #include "util/u_format.h"
> +#include "evergreend.h"
>
>  enum r600_blitter_op /* bitmask */
>  {
> @@ -311,7 +312,8 @@ static void r600_blit_decompress_color(struct 
> pipe_context *ctx,
> cbsurf = ctx->create_surface(ctx, 
> &rtex->resource.b.b, &surf_tmpl);
>
> r600_blitter_begin(ctx, R600_DECOMPRESS);
> -   util_blitter_custom_color(rctx->blitter, cbsurf, 
> rctx->custom_blend_decompress);
> +   util_blitter_custom_color(rctx->blitter, cbsurf,
> +   rtex->fmask_size ? 
> rctx->custom_blend_decompress : rctx->custom_blend_fastclear);
> r600_blitter_end(ctx);
>
> pipe_surface_reference(&cbsurf, NULL);
> @@ -341,7 +343,7 @@ void r600_decompress_color_textures(struct r600_context 
> *rctx,
> assert(view);
>
> tex = (struct r600_texture *)view->texture;
> -   assert(tex->cmask_size && tex->fmask_size);
> +   assert(tex->cmask_size);
>
> r600_blit_decompress_color(&rctx->b.b, tex,
>view->u.tex.first_level, 
> view->u.tex.last_level,
> @@ -376,7 +378,7 @@ static bool r600_decompress_subresource(struct 
> pipe_context *ctx,
>first_layer, last_layer,
>0, u_max_sample(tex));
> }
> -   } else if (rtex->fmask_size && rtex->cmask_size) {
> +   } else if (rtex->cmask_size) {
> r600_blit_decompress_color(ctx, rtex, level, level,
>first_layer, last_layer);
> }
> @@ -432,6 +434,27 @@ static void evergreen_set_clear_color(struct 
> pipe_surface *cbuf,
> memcpy(clear_value, &uc, 2 * sizeof(uint32_t));
>  }
>
> +static void evergreen_check_alloc_cmask(struct pipe_context *ctx,
> +struct pipe_surface *cbuf)
> +{
> +struct r600_context *rctx = (struct r600_context *)ctx;
> +struct r600_texture *tex = (struct r600_texture *)cbuf->texture;
> +struct r600_surface *surf = (struct r600_surface *)cbuf;
> +
> +if (tex->cmask)
> +return;
> +
> +r600_texture_init_cmask(rctx->screen, tex);
> +
> +/* update colorbuffer state bits */
> +if (tex->cmask != NULL) {
> +uint64_t va = r600_resource_va(rctx->b.b.screen, 
> &tex->cmask->b.b);
> +surf->cb_color_cmask = va >> 8;
> +surf->cb_color_cmask_slice = 
> S_028C80_TILE_MAX(tex->cmask_slice_tile_max);
> +surf->cb_color_info |= S_028C70_FAST_CLEAR(1);
> +}
> +}
> +
>  static bool can_fast_clear_color(struct pipe_context *ctx)
>  {
> struct r600_context *rctx = (struct r600_context *)ctx;
> @@ -445,10 +468,6 @@ static bool can_fast_clear_color(struct pipe_context 
> *ctx)
> for (i = 0; i < fb->nr_cbufs;

[Mesa-dev] [PATCH 0/9] i965/gen7 geometry shader support, series 4

2013-09-09 Thread Paul Berry
This is the next installment of geometry shader support for i965 gen7.
Patches 1-2 are a rewrite of patches 21-22 of series 3 (confusingly
called "Initial geometry shader support, part 2" on the mailing
list--sorry about that), modified according to Ken's comments.  Once
these two patches land, i965/gen7 users should be able to try out
geometry shaders by setting the environment variables:

MESA_GL_VERSION_OVERRIDE=3.2
MESA_GLSL_VERSION_OVERRIDE=150

Patches 3-9 implement EndPrimitive() functionality.  This is tricky on
gen7 since all geometry shader output vertices are buffered in the URB
until the thread terminates, and then they are all processed as one
unit by the fixed function pipeline.  The hardware requires us to keep
track of when EndPrimitive() was called relative to EmitVertex(), and
populate a bitfield at the top of the URB with that information.  The
bitfield has one bit per vertex, indicating 0 if EndPrimitive() wasn't
called after emitting that vertex, 1 if it was.

It's difficult in GEN assembly to address a bitfield whose size is
greater than 32 bits, so we prepare 32 EndPrimitive bits at a time,
and write them out to the URB as soon as each batch of 32 bits is
completed.  This requires a little extra bookkeeping, but IMHO it's
much less work than if we'd tried to maintain all 256 bits in
registers and output them at the end.  Also, an advantage of the "32
bits at a time" scheme is that most of the extra bookkeeping goes away
for the common case of geometry shaders that set max_vertices <= 32.

[PATCH 1/9] i965/gen7: Extract a function for setting up a shader stage's 
constants.
[PATCH 2/9] i965/gs: Add a state atom to set up geometry shader state.
[PATCH 3/9] glsl: During linking, record whether a GS uses EndPrimitive().
[PATCH 4/9] i965/gs: Set control data header size/format appropriately for 
EndPrimitive().
[PATCH 5/9] i965/gen7: Allow URB_WRITE channel masks to be used.
[PATCH 6/9] i965/gen7: Add the ability to send URB_WRITE_OWORD messages.
[PATCH 7/9] i965/gs: Add opcodes needed for EndPrimitive().
[PATCH 8/9] i965/vec4: Add the ability to emit opcodes with just a dst register.
[PATCH 9/9] i965/gs: implement EndPrimitive() functionality in the visitor.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/9] i965/gen7: Extract a function for setting up a shader stage's constants.

2013-09-09 Thread Paul Berry
This will allow us to reuse some code when setting up the geometry
shader stage.
---
 src/mesa/drivers/dri/i965/brw_state.h |  6 +++
 src/mesa/drivers/dri/i965/gen7_vs_state.c | 61 ++-
 2 files changed, 42 insertions(+), 25 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_state.h 
b/src/mesa/drivers/dri/i965/brw_state.h
index 22e4a61..4c4a536 100644
--- a/src/mesa/drivers/dri/i965/brw_state.h
+++ b/src/mesa/drivers/dri/i965/brw_state.h
@@ -240,6 +240,12 @@ brw_vec4_upload_binding_table(struct brw_context *brw,
   struct brw_stage_state *stage_state,
   const struct brw_vec4_prog_data *prog_data);
 
+/* gen7_vs_state.c */
+void
+gen7_upload_constant_state(struct brw_context *brw,
+   const struct brw_stage_state *stage_state,
+   bool active, unsigned opcode);
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/src/mesa/drivers/dri/i965/gen7_vs_state.c 
b/src/mesa/drivers/dri/i965/gen7_vs_state.c
index 6e72e8f..4fd1913 100644
--- a/src/mesa/drivers/dri/i965/gen7_vs_state.c
+++ b/src/mesa/drivers/dri/i965/gen7_vs_state.c
@@ -29,6 +29,40 @@
 #include "program/prog_statevars.h"
 #include "intel_batchbuffer.h"
 
+
+void
+gen7_upload_constant_state(struct brw_context *brw,
+   const struct brw_stage_state *stage_state,
+   bool active, unsigned opcode)
+{
+   if (!active || stage_state->push_const_size == 0) {
+  /* Disable the push constant buffers. */
+  BEGIN_BATCH(7);
+  OUT_BATCH(opcode << 16 | (7 - 2));
+  OUT_BATCH(0);
+  OUT_BATCH(0);
+  OUT_BATCH(0);
+  OUT_BATCH(0);
+  OUT_BATCH(0);
+  OUT_BATCH(0);
+  ADVANCE_BATCH();
+   } else {
+  BEGIN_BATCH(7);
+  OUT_BATCH(opcode << 16 | (7 - 2));
+  OUT_BATCH(stage_state->push_const_size);
+  OUT_BATCH(0);
+  /* Pointer to the constant buffer.  Covered by the set of state flags
+   * from gen6_prepare_wm_contants
+   */
+  OUT_BATCH(stage_state->push_const_offset | GEN7_MOCS_L3);
+  OUT_BATCH(0);
+  OUT_BATCH(0);
+  OUT_BATCH(0);
+  ADVANCE_BATCH();
+   }
+}
+
+
 static void
 upload_vs_state(struct brw_context *brw)
 {
@@ -52,31 +86,8 @@ upload_vs_state(struct brw_context *brw)
OUT_BATCH(stage_state->sampler_offset);
ADVANCE_BATCH();
 
-   if (stage_state->push_const_size == 0) {
-  /* Disable the push constant buffers. */
-  BEGIN_BATCH(7);
-  OUT_BATCH(_3DSTATE_CONSTANT_VS << 16 | (7 - 2));
-  OUT_BATCH(0);
-  OUT_BATCH(0);
-  OUT_BATCH(0);
-  OUT_BATCH(0);
-  OUT_BATCH(0);
-  OUT_BATCH(0);
-  ADVANCE_BATCH();
-   } else {
-  BEGIN_BATCH(7);
-  OUT_BATCH(_3DSTATE_CONSTANT_VS << 16 | (7 - 2));
-  OUT_BATCH(stage_state->push_const_size);
-  OUT_BATCH(0);
-  /* Pointer to the VS constant buffer.  Covered by the set of
-   * state flags from gen6_prepare_wm_contants
-   */
-  OUT_BATCH(stage_state->push_const_offset | GEN7_MOCS_L3);
-  OUT_BATCH(0);
-  OUT_BATCH(0);
-  OUT_BATCH(0);
-  ADVANCE_BATCH();
-   }
+   gen7_upload_constant_state(brw, stage_state, true /* active */,
+  _3DSTATE_CONSTANT_VS);
 
/* Use ALT floating point mode for ARB vertex programs, because they
 * require 0^0 == 1.
-- 
1.8.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/2] gallium: add PIPE_CAP_MIXED_FRAMEBUFFER_SIZES

2013-09-09 Thread Roland Scheidegger
Ok I guess it makes sense to keep PIPE_CAP_MIXED_COLORBUFFER_FORMATS
then in addition to the new cap bits (as at least r300 should be able to
benefit from it).

Roland

Am 09.09.2013 15:01, schrieb Marek Olšák:
> On r300, colorbuffers can have different pitches, therefore each of
> them can be of arbitrary size. Other than that, there is a separate
> framebuffer width and height state, which applies to all colorbuffers
> (it's usually set to the minimum of all widths and heights as in
> OpenGL).
> 
> r300 doesn't support mixed formats (r500 does), but it does support
> mixed sizes and various other features like MSAA.
> 
> PIPE_CAP_MIXED_COLORBUFFER_FORMATS doesn't affect exposed extensions,
> but it affects FBO completeness.
> 
> Marek
> 
> On Mon, Sep 9, 2013 at 2:36 PM, Roland Scheidegger  wrote:
>> Am 09.09.2013 13:55, schrieb Erik Faye-Lund:
>>> On Mon, Sep 9, 2013 at 1:31 PM, Roland Scheidegger  
>>> wrote:
 I'm not really convinced of this idea.
 There's already PIPE_CAP_MIXED_COLORBUFFER_FORMATS which is sort of
 similar - a "requirement" of ARB_fbo, but it isn't used to determine
 support of ARB_fbo or not (my guess is drivers want to advertize ARB_fbo
 even if they can't do it, and ARB_fbo doesn't really have a hard
 requirement for anything as you always can say "no" to fb supported).
 So I can't see why not supporting different width/height is treated
 different to not supporting different formats.
>>>
>>> Actually, ARB_framebuffer_object does have a hard requirement on
>>> rendering to differently sized attachments. FRAMEBUFFER_UNSUPPORTED is
>>> only allowed for format-issues. EXT_framebuffer_objects can still be
>>> supported on hardware that cannot meet this requirement
>>>
>>
>> Ah you're right. For some reason I thought it would be permitted for a
>> driver to return unsupported framebuffer for any reason.
>> So this makes sense then.
>> (Though I'm wondering if we really need the
>> PIPE_CAP_MIXED_COLORBUFFER_FORMATS then since I don't think there's any
>> driver which can do different width/height hence support ARB_fbo but not
>> actually support mixed formats, r300 and softpipe might be candidates
>> though I don't know why the latter wouldn't support mixed formats as it
>> claims not to and have no idea if the former can support different
>> width/height.)
>>
>> Roland
>> ___
>> mesa-dev mailing list
>> mesa-dev@lists.freedesktop.org
>> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/9] i965/gs: Add a state atom to set up geometry shader state.

2013-09-09 Thread Paul Berry
v2: Do not attempt to share the code that uploads
3DSTATE_BINDING_TABLE_POINTERS_GS, 3DSTATE_SAMPLER_STATE_POINTERS_GS,
or 3DSTATE_GS with VS.
---
 src/mesa/drivers/dri/i965/Makefile.sources   |   1 +
 src/mesa/drivers/dri/i965/brw_defines.h  |   7 ++
 src/mesa/drivers/dri/i965/brw_state.h|   2 +
 src/mesa/drivers/dri/i965/brw_state_upload.c |   2 +
 src/mesa/drivers/dri/i965/gen7_disable.c |  33 --
 src/mesa/drivers/dri/i965/gen7_gs_state.c| 144 +++
 6 files changed, 156 insertions(+), 33 deletions(-)
 create mode 100644 src/mesa/drivers/dri/i965/gen7_gs_state.c

diff --git a/src/mesa/drivers/dri/i965/Makefile.sources 
b/src/mesa/drivers/dri/i965/Makefile.sources
index 5299d0d..07c1053 100644
--- a/src/mesa/drivers/dri/i965/Makefile.sources
+++ b/src/mesa/drivers/dri/i965/Makefile.sources
@@ -122,6 +122,7 @@ i965_FILES = \
gen7_blorp.cpp \
gen7_clip_state.c \
gen7_disable.c \
+gen7_gs_state.c \
gen7_misc_state.c \
gen7_sampler_state.c \
gen7_sf_state.c \
diff --git a/src/mesa/drivers/dri/i965/brw_defines.h 
b/src/mesa/drivers/dri/i965/brw_defines.h
index d5a12f1..0406c4d 100644
--- a/src/mesa/drivers/dri/i965/brw_defines.h
+++ b/src/mesa/drivers/dri/i965/brw_defines.h
@@ -1328,15 +1328,22 @@ enum brw_message_target {
 # define GEN6_GS_FLOATING_POINT_MODE_IEEE_754  (0 << 16)
 # define GEN6_GS_FLOATING_POINT_MODE_ALT   (1 << 16)
 /* DW4 */
+# define GEN7_GS_OUTPUT_VERTEX_SIZE_SHIFT  23
+# define GEN7_GS_OUTPUT_TOPOLOGY_SHIFT 17
 # define GEN6_GS_URB_READ_LENGTH_SHIFT 11
 # define GEN7_GS_INCLUDE_VERTEX_HANDLES(1 << 10)
 # define GEN6_GS_URB_ENTRY_READ_OFFSET_SHIFT   4
 # define GEN6_GS_DISPATCH_START_GRF_SHIFT  0
 /* DW5 */
 # define GEN6_GS_MAX_THREADS_SHIFT 25
+# define HSW_GS_MAX_THREADS_SHIFT  24
+# define GEN7_GS_DISPATCH_MODE_SINGLE  (0 << 11)
+# define GEN7_GS_DISPATCH_MODE_DUAL_INSTANCE   (1 << 11)
+# define GEN7_GS_DISPATCH_MODE_DUAL_OBJECT (2 << 11)
 # define GEN6_GS_STATISTICS_ENABLE (1 << 10)
 # define GEN6_GS_SO_STATISTICS_ENABLE  (1 << 9)
 # define GEN6_GS_RENDERING_ENABLE  (1 << 8)
+# define GEN7_GS_INCLUDE_PRIMITIVE_ID  (1 << 4)
 # define GEN7_GS_ENABLE(1 << 0)
 /* DW6 */
 # define GEN6_GS_REORDER   (1 << 30)
diff --git a/src/mesa/drivers/dri/i965/brw_state.h 
b/src/mesa/drivers/dri/i965/brw_state.h
index 4c4a536..04c1a97 100644
--- a/src/mesa/drivers/dri/i965/brw_state.h
+++ b/src/mesa/drivers/dri/i965/brw_state.h
@@ -116,6 +116,8 @@ extern const struct brw_tracked_state gen7_depthbuffer;
 extern const struct brw_tracked_state gen7_cc_viewport_state_pointer;
 extern const struct brw_tracked_state gen7_clip_state;
 extern const struct brw_tracked_state gen7_disable_stages;
+extern const struct brw_tracked_state gen7_gs_push_constants;
+extern const struct brw_tracked_state gen7_gs_state;
 extern const struct brw_tracked_state gen7_ps_state;
 extern const struct brw_tracked_state gen7_push_constant_space;
 extern const struct brw_tracked_state gen7_sbe_state;
diff --git a/src/mesa/drivers/dri/i965/brw_state_upload.c 
b/src/mesa/drivers/dri/i965/brw_state_upload.c
index b6a6a0a..8f21f06 100644
--- a/src/mesa/drivers/dri/i965/brw_state_upload.c
+++ b/src/mesa/drivers/dri/i965/brw_state_upload.c
@@ -196,6 +196,7 @@ static const struct brw_tracked_state *gen7_atoms[] =
&gen6_depth_stencil_state,  /* must do before cc unit */
 
&gen6_vs_push_constants, /* Before vs_state */
+   &gen7_gs_push_constants, /* Before gs_state */
&gen6_wm_push_constants, /* Before wm_surfaces and constant_buffer */
 
/* Surface state setup.  Must come before the VS/WM unit.  The binding
@@ -220,6 +221,7 @@ static const struct brw_tracked_state *gen7_atoms[] =
 
&gen7_disable_stages,
&gen7_vs_state,
+   &gen7_gs_state,
&gen7_sol_state,
&gen7_clip_state,
&gen7_sbe_state,
diff --git a/src/mesa/drivers/dri/i965/gen7_disable.c 
b/src/mesa/drivers/dri/i965/gen7_disable.c
index 860aa95..98d115b 100644
--- a/src/mesa/drivers/dri/i965/gen7_disable.c
+++ b/src/mesa/drivers/dri/i965/gen7_disable.c
@@ -29,39 +29,6 @@
 static void
 disable_stages(struct brw_context *brw)
 {
-   assert(!brw->ff_gs.prog_active);
-
-   /* Disable the Geometry Shader (GS) Unit */
-   BEGIN_BATCH(7);
-   OUT_BATCH(_3DSTATE_CONSTANT_GS << 16 | (7 - 2));
-   OUT_BATCH(0);
-   OUT_BATCH(0);
-   OUT_BATCH(0);
-   OUT_BATCH(0);
-   OUT_BATCH(0);
-   OUT_BATCH(0);
-   ADVANCE_BATCH();
-
-   BEGIN_BATCH(7);
-   OUT_BATCH(_3DSTATE_GS << 16 | (7 - 2));
-   OUT_BATCH(0); /* prog_bo */
-   OUT_BATCH((0 << GEN6_GS_SAMPLER_COUNT_SHIFT) |
-(0 << GEN6_GS_BINDING_TABLE_ENTRY_COUNT_SHIFT));
-   OUT_BATCH

[Mesa-dev] [PATCH 3/9] glsl: During linking, record whether a GS uses EndPrimitive().

2013-09-09 Thread Paul Berry
This information will be useful in the i965 back end, since we can
save some compilation effort if we know from the outset that the
shader never calls EndPrimitive().
---
 src/glsl/linker.cpp   | 31 +++
 src/mesa/main/mtypes.h|  2 ++
 src/mesa/main/shaderapi.c |  1 +
 3 files changed, 34 insertions(+)

diff --git a/src/glsl/linker.cpp b/src/glsl/linker.cpp
index 8430096..f10303e 100644
--- a/src/glsl/linker.cpp
+++ b/src/glsl/linker.cpp
@@ -249,6 +249,33 @@ public:
 };
 
 
+/**
+ * Visitor that determines whether or not a shader uses ir_end_primitive.
+ */
+class find_end_primitive_visitor : public ir_hierarchical_visitor {
+public:
+   find_end_primitive_visitor()
+  : found(false)
+   {
+  /* empty */
+   }
+
+   virtual ir_visitor_status visit(ir_end_primitive *)
+   {
+  found = true;
+  return visit_stop;
+   }
+
+   bool end_primitive_found()
+   {
+  return found;
+   }
+
+private:
+   bool found;
+};
+
+
 void
 linker_error(gl_shader_program *prog, const char *fmt, ...)
 {
@@ -517,6 +544,10 @@ validate_geometry_shader_executable(struct 
gl_shader_program *prog,
 
analyze_clip_usage("geometry", prog, shader, &prog->Geom.UsesClipDistance,
   &prog->Geom.ClipDistanceArraySize);
+
+   find_end_primitive_visitor end_primitive;
+   end_primitive.run(shader->ir);
+   prog->Geom.UsesEndPrimitive = end_primitive.end_primitive_found();
 }
 
 
diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h
index ef16c59..18a6f66 100644
--- a/src/mesa/main/mtypes.h
+++ b/src/mesa/main/mtypes.h
@@ -1931,6 +1931,7 @@ struct gl_geometry_program
GL_TRIANGLES, or GL_TRIANGLES_ADJACENCY_ARB */
GLenum OutputType; /**< GL_POINTS, GL_LINE_STRIP or GL_TRIANGLE_STRIP */
GLboolean UsesClipDistance;
+   GLboolean UsesEndPrimitive;
 };
 
 
@@ -2364,6 +2365,7 @@ struct gl_shader_program
   GLboolean UsesClipDistance;
   GLuint ClipDistanceArraySize; /**< Size of the gl_ClipDistance array, or
  0 if not present. */
+  GLboolean UsesEndPrimitive;
} Geom;
 
/** Vertex shader state */
diff --git a/src/mesa/main/shaderapi.c b/src/mesa/main/shaderapi.c
index 4fe9d9c..a2386fb 100644
--- a/src/mesa/main/shaderapi.c
+++ b/src/mesa/main/shaderapi.c
@@ -1872,6 +1872,7 @@ _mesa_copy_linked_program_data(gl_shader_type type,
   dst_gp->InputType = src->Geom.InputType;
   dst_gp->OutputType = src->Geom.OutputType;
   dst_gp->UsesClipDistance = src->Geom.UsesClipDistance;
+  dst_gp->UsesEndPrimitive = src->Geom.UsesEndPrimitive;
}
   break;
default:
-- 
1.8.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 5/9] i965/gen7: Allow URB_WRITE channel masks to be used.

2013-09-09 Thread Paul Berry
Previously, brw_urb_WRITE() would unconditionally override the channel
masks in the URB_WRITE message to 0xff (indicating that all channels
should be written to the URB).

In order to support geometry shader EndPrimitive functionality, we'll
need the ability to set the channel masks programatically, so that we
can output just 32 of the control data bits at a time.  So this patch
adds a flag that will prevent brw_urb_WRITE() from overriding them.
---
 src/mesa/drivers/dri/i965/brw_eu.h  | 6 ++
 src/mesa/drivers/dri/i965/brw_eu_emit.c | 2 +-
 2 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/src/mesa/drivers/dri/i965/brw_eu.h 
b/src/mesa/drivers/dri/i965/brw_eu.h
index 6ac1c68..4d47cdd 100644
--- a/src/mesa/drivers/dri/i965/brw_eu.h
+++ b/src/mesa/drivers/dri/i965/brw_eu.h
@@ -258,6 +258,12 @@ enum brw_urb_write_flags {
BRW_URB_WRITE_PER_SLOT_OFFSET = 0x10,
 
/**
+* Indicates that the channel masks in the URB_WRITE message header should
+* not be overridden to 0xff (gen == 7).
+*/
+   BRW_URB_WRITE_USE_CHANNEL_MASKS = 0x20,
+
+   /**
 * Convenient combination of flags: end the thread while simultaneously
 * marking the given URB entry as complete.
 */
diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c 
b/src/mesa/drivers/dri/i965/brw_eu_emit.c
index f26c913..0995a9a 100644
--- a/src/mesa/drivers/dri/i965/brw_eu_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c
@@ -,7 +,7 @@ void brw_urb_WRITE(struct brw_compile *p,
 
gen6_resolve_implied_move(p, &src0, msg_reg_nr);
 
-   if (brw->gen == 7) {
+   if (brw->gen == 7 && !(flags & BRW_URB_WRITE_USE_CHANNEL_MASKS)) {
   /* Enable Channel Masks in the URB_WRITE_HWORD message header */
   brw_push_insn_state(p);
   brw_set_access_mode(p, BRW_ALIGN_1);
-- 
1.8.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 4/9] i965/gs: Set control data header size/format appropriately for EndPrimitive().

2013-09-09 Thread Paul Berry
The gen7 geometry shader uses a "control data header" at the beginning
of the output URB entry to store either

(a) flag bits (1 bit/vertex) indicating whether EndPrimitive() was
called after each vertex, or

(b) stream ID bits (2 bits/vertex) indicating which stream each vertex
should be sent to (when multiple transform feedback streams are in
use).

Fortunately, OpenGL only requires separate streams to be supported
when the output type is points, and EndPrimitive() only has an effect
when the input type is line_strip or triangle_strip, so it's not a
problem that these two uses of the control data header are mutually
exclusive.

This patch modifies do_vec4_gs_prog() to determine the correct
hardware settings for configuring the control data header, and
modifies upload_gs_state() to propagate these settings to the
hardware.

In addition, it modifies do_vec4_gs_prog() to ensure that the output
URB entry is large enough to contain both the output vertices *and*
the control data header.

Finally, it modifies vec4_gs_visitor so that it accounts for the size
of the control data header when computing the offset within the URB
where output vertex data should be stored.
---
 src/mesa/drivers/dri/i965/brw_context.h   | 14 ++
 src/mesa/drivers/dri/i965/brw_defines.h   |  4 +++
 src/mesa/drivers/dri/i965/brw_vec4_gs.c   | 33 +++
 src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp |  1 +
 src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.h   |  3 +++
 src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp|  2 +-
 src/mesa/drivers/dri/i965/gen7_gs_state.c |  4 +++
 7 files changed, 60 insertions(+), 1 deletion(-)

diff --git a/src/mesa/drivers/dri/i965/brw_context.h 
b/src/mesa/drivers/dri/i965/brw_context.h
index 57f086b..c566bba 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -548,6 +548,20 @@ struct brw_gs_prog_data
unsigned output_vertex_size_hwords;
 
unsigned output_topology;
+
+   /**
+* Size of the control data (cut bits or StreamID bits), in hwords (32
+* bytes).  0 if there is no control data.
+*/
+   unsigned control_data_header_size_hwords;
+
+   /**
+* Format of the control data (either GEN7_GS_CONTROL_DATA_FORMAT_GSCTL_SID
+* if the control data is StreamID bits, or
+* GEN7_GS_CONTROL_DATA_FORMAT_GSCTL_CUT if the control data is cut bits).
+* Ignored if control_data_header_size is 0.
+*/
+   unsigned control_data_format;
 };
 
 /** Number of texture sampler units */
diff --git a/src/mesa/drivers/dri/i965/brw_defines.h 
b/src/mesa/drivers/dri/i965/brw_defines.h
index 0406c4d..6db2570 100644
--- a/src/mesa/drivers/dri/i965/brw_defines.h
+++ b/src/mesa/drivers/dri/i965/brw_defines.h
@@ -1337,6 +1337,10 @@ enum brw_message_target {
 /* DW5 */
 # define GEN6_GS_MAX_THREADS_SHIFT 25
 # define HSW_GS_MAX_THREADS_SHIFT  24
+# define GEN7_GS_CONTROL_DATA_FORMAT_SHIFT 24
+# define GEN7_GS_CONTROL_DATA_FORMAT_GSCTL_CUT 0
+# define GEN7_GS_CONTROL_DATA_FORMAT_GSCTL_SID 1
+# define GEN7_GS_CONTROL_DATA_HEADER_SIZE_SHIFT20
 # define GEN7_GS_DISPATCH_MODE_SINGLE  (0 << 11)
 # define GEN7_GS_DISPATCH_MODE_DUAL_INSTANCE   (1 << 11)
 # define GEN7_GS_DISPATCH_MODE_DUAL_OBJECT (2 << 11)
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_gs.c 
b/src/mesa/drivers/dri/i965/brw_vec4_gs.c
index 7ab03ac..f67ae2b 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_gs.c
+++ b/src/mesa/drivers/dri/i965/brw_vec4_gs.c
@@ -62,6 +62,38 @@ do_gs_prog(struct brw_context *brw,
c.prog_data.base.param = rzalloc_array(NULL, const float *, param_count);
c.prog_data.base.pull_param = rzalloc_array(NULL, const float *, 
param_count);
 
+   if (gp->program.OutputType == GL_POINTS) {
+  /* When the output type is points, the geometry shader may output data
+   * to multiple streams, and EndPrimitive() has no effect.  So we
+   * configure the hardware to interpret the control data as stream ID.
+   */
+  c.prog_data.control_data_format = GEN7_GS_CONTROL_DATA_FORMAT_GSCTL_SID;
+
+  /* However, StreamID is not yet supported, so we output zero bits of
+   * control data per vertex.
+   */
+  c.control_data_bits_per_vertex = 0;
+   } else {
+  /* When the output type is triangle_strip or line_strip, EndPrimitive()
+   * may be used to terminate the current strip and start a new one
+   * (similar to primitive restart), and outputting data to multiple
+   * streams is not supported.  So we configure the hardware to interpret
+   * the control data as EndPrimitive information (a.k.a. "cut bits").
+   */
+  c.prog_data.control_data_format = GEN7_GS_CONTROL_DATA_FORMAT_GSCTL_CUT;
+
+  /* We only need to output control data if the shader actually calls
+   * EndPrimitive().
+   */
+  c.control_data_bits_p

[Mesa-dev] [PATCH 8/9] i965/vec4: Add the ability to emit opcodes with just a dst register.

2013-09-09 Thread Paul Berry
This is needed for GS_OPCODE_PREPARE_CHANNEL_MASKS.
---
 src/mesa/drivers/dri/i965/brw_vec4.h   | 2 ++
 src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 6 ++
 2 files changed, 8 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h 
b/src/mesa/drivers/dri/i965/brw_vec4.h
index cba5cd4..f0ab53d 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.h
+++ b/src/mesa/drivers/dri/i965/brw_vec4.h
@@ -394,6 +394,8 @@ public:
 
vec4_instruction *emit(enum opcode opcode);
 
+   vec4_instruction *emit(enum opcode opcode, dst_reg dst);
+
vec4_instruction *emit(enum opcode opcode, dst_reg dst, src_reg src0);
 
vec4_instruction *emit(enum opcode opcode, dst_reg dst,
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp 
b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
index 004a884..cdc6ba0 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
@@ -83,6 +83,12 @@ vec4_visitor::emit(enum opcode opcode, dst_reg dst, src_reg 
src0)
 }
 
 vec4_instruction *
+vec4_visitor::emit(enum opcode opcode, dst_reg dst)
+{
+   return emit(new(mem_ctx) vec4_instruction(this, opcode, dst));
+}
+
+vec4_instruction *
 vec4_visitor::emit(enum opcode opcode)
 {
return emit(new(mem_ctx) vec4_instruction(this, opcode, dst_reg()));
-- 
1.8.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 7/9] i965/gs: Add opcodes needed for EndPrimitive().

2013-09-09 Thread Paul Berry
---
 src/mesa/drivers/dri/i965/brw_defines.h | 21 
 src/mesa/drivers/dri/i965/brw_shader.cpp|  4 ++
 src/mesa/drivers/dri/i965/brw_vec4.h|  2 +
 src/mesa/drivers/dri/i965/brw_vec4_emit.cpp | 81 +
 4 files changed, 108 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/brw_defines.h 
b/src/mesa/drivers/dri/i965/brw_defines.h
index 4742103..7b53c68 100644
--- a/src/mesa/drivers/dri/i965/brw_defines.h
+++ b/src/mesa/drivers/dri/i965/brw_defines.h
@@ -847,6 +847,27 @@ enum opcode {
 * scratch reads and writes to operate correctly.
 */
GS_OPCODE_SET_DWORD_2_IMMED,
+
+   /**
+* Prepare the dst register for storage in the "Channel Mask" fields of a
+* URB_WRITE message header.
+*
+* DWORD 4 of dst is shifted left by 4 bits, so that later,
+* GS_OPCODE_SET_CHANNEL_MASKS can OR DWORDs 0 and 4 together to form the
+* final channel mask.
+*/
+   GS_OPCODE_PREPARE_CHANNEL_MASKS,
+
+   /**
+* Set the "Channel Mask" fields of a URB_WRITE message header.
+*
+* - dst is the MRF containing the message header.
+*
+* - src.x is the channel mask, as prepared by
+*   GS_OPCODE_PREPARE_CHANNEL_MASKS.  DWORDs 0 and 4 are OR'ed together to
+*   form the final channel mask.
+*/
+   GS_OPCODE_SET_CHANNEL_MASKS,
 };
 
 #define BRW_PREDICATE_NONE 0
diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp 
b/src/mesa/drivers/dri/i965/brw_shader.cpp
index e7dbdbe..53364a5 100644
--- a/src/mesa/drivers/dri/i965/brw_shader.cpp
+++ b/src/mesa/drivers/dri/i965/brw_shader.cpp
@@ -507,6 +507,10 @@ brw_instruction_name(enum opcode op)
   return "set_vertex_count";
case GS_OPCODE_SET_DWORD_2_IMMED:
   return "set_dword_2_immed";
+   case GS_OPCODE_PREPARE_CHANNEL_MASKS:
+  return "prepare_channel_masks";
+   case GS_OPCODE_SET_CHANNEL_MASKS:
+  return "set_channel_masks";
 
default:
   /* Yes, this leaks.  It's in debug code, it should never occur, and if
diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h 
b/src/mesa/drivers/dri/i965/brw_vec4.h
index c5101d3..cba5cd4 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.h
+++ b/src/mesa/drivers/dri/i965/brw_vec4.h
@@ -610,6 +610,8 @@ private:
void generate_gs_set_vertex_count(struct brw_reg dst,
  struct brw_reg src);
void generate_gs_set_dword_2_immed(struct brw_reg dst, struct brw_reg src);
+   void generate_gs_prepare_channel_masks(struct brw_reg dst);
+   void generate_gs_set_channel_masks(struct brw_reg dst, struct brw_reg src);
void generate_oword_dual_block_offsets(struct brw_reg m1,
  struct brw_reg index);
void generate_scratch_write(vec4_instruction *inst,
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp 
b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
index bf04bd9..12e1b50 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
@@ -516,6 +516,79 @@ vec4_generator::generate_gs_set_dword_2_immed(struct 
brw_reg dst,
 }
 
 void
+vec4_generator::generate_gs_prepare_channel_masks(struct brw_reg dst)
+{
+   /* We want to left shift just DWORD 4 (the x component belonging to the
+* second geometry shader invocation) by 4 bits.  So generate the
+* instruction:
+*
+* shl(1) dst.4<1>UD dst.4<0,1,0>UD 4UD { align1 WE_all }
+*/
+   dst = suboffset(vec1(dst), 4);
+   brw_push_insn_state(p);
+   brw_set_access_mode(p, BRW_ALIGN_1);
+   brw_set_mask_control(p, BRW_MASK_DISABLE);
+   brw_SHL(p, dst, dst, brw_imm_ud(4));
+   brw_pop_insn_state(p);
+}
+
+void
+vec4_generator::generate_gs_set_channel_masks(struct brw_reg dst,
+  struct brw_reg src)
+{
+   /* From p21 of volume 4 part 2 of the Ivy Bridge PRM (2.4.3.1 Message
+* Header: M0.5):
+*
+* 15 Vertex 1 DATA [3] / Vertex 0 DATA[7] Channel Mask
+*
+*When Swizzle Control = URB_INTERLEAVED this bit controls Vertex 1
+*DATA[3], when Swizzle Control = URB_NOSWIZZLE this bit controls
+*Vertex 0 DATA[7].  This bit is ANDed with the corresponding
+*channel enable to determine the final channel enable.  For the
+*URB_READ_OWORD & URB_READ_HWORD messages, when final channel
+*enable is 1 it indicates that Vertex 1 DATA [3] will be included
+*in the writeback message.  For the URB_WRITE_OWORD &
+*URB_WRITE_HWORD messages, when final channel enable is 1 it
+*indicates that Vertex 1 DATA [3] will be written to the surface.
+*
+*0: Vertex 1 DATA [3] / Vertex 0 DATA[7] channel not included
+*1: Vertex DATA [3] / Vertex 0 DATA[7] channel included
+*
+* 14 Vertex 1 DATA [2] Channel Mask
+* 13 Vertex 1 DATA [1] Channel Mask
+* 12 Vertex 1 DATA [0] Channel Mask
+* 11 Vertex 0 DATA [3] Channel Mask
+*

[Mesa-dev] [PATCH 9/9] i965/gs: implement EndPrimitive() functionality in the visitor.

2013-09-09 Thread Paul Berry
According to GLSL, the shader may call EndPrimitive() at any point
during its execution, causing the line or triangle strip currently
being output to be terminated and a new strip to be begun.

This is implemented in gen7 hardware by using one control data bit per
vertex, to indicate whether EndPrimitive() was called after that
vertex was emitted.

In order to make this work without sacrificing too much efficiency, we
accumulate 32 control data bits at a time in a GRF.  When we have
accumulated 32 bits (or when the shader terminates), we output them to
the appropriate DWORD in the control data header and reset the
accumulator to 0.

We have to take special care to make sure that EndPrimitive() calls
that occur prior to the first vertex have no effect.

Since geometry shaders that output a large number of vertices are
likely to be rare, an optimization kicks in if max_vertices <= 32.  In
this case, we know that we can wait until the end of shader execution
before any control data bits need to be output.

I've tried to write the code in such a way that in the future, we can
easily adapt it to output stream ID bits (which are two bits/vertex
instead of one).

Fixes piglit tests "spec/glsl-1.50/glsl-1.50-geometry-end-primitive *".
---
 src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp | 226 +-
 src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.h   |   2 +
 2 files changed, 227 insertions(+), 1 deletion(-)

diff --git a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp 
b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp
index 37cde64..1a8ce1a 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp
@@ -135,6 +135,23 @@ vec4_gs_visitor::emit_prolog()
vec4_instruction *inst = emit(MOV(dst_reg(this->vertex_count), 0u));
inst->force_writemask_all = true;
 
+   if (c->control_data_header_size_bits > 0) {
+  /* Create a virtual register to hold the current set of control data
+   * bits.
+   */
+  this->control_data_bits = src_reg(this, glsl_type::uint_type);
+
+  /* If we're outputting more than 32 control data bits, then EmitVertex()
+   * will set control_data_bits to 0 after emitting the first vertex.
+   * Otherwise, we need to initialize it to 0 here.
+   */
+  if (c->control_data_header_size_bits <= 32) {
+ this->current_annotation = "initialize control data bits";
+ inst = emit(MOV(dst_reg(this->control_data_bits), 0u));
+ inst->force_writemask_all = true;
+  }
+   }
+
this->current_annotation = NULL;
 }
 
@@ -150,6 +167,16 @@ vec4_gs_visitor::emit_program_code()
 void
 vec4_gs_visitor::emit_thread_end()
 {
+   if (c->control_data_header_size_bits > 0) {
+  /* During shader execution, we only ever call emit_control_data_bits()
+   * just prior to outputting a vertex.  Therefore, the control data bits
+   * corresponding to the most recently output vertex still need to be
+   * emitted.
+   */
+  current_annotation = "thread end: emit control data bits";
+  emit_control_data_bits();
+   }
+
/* MRF 0 is reserved for the debugger, so start with message header
 * in MRF 1.
 */
@@ -224,6 +251,124 @@ 
vec4_gs_visitor::compute_array_stride(ir_dereference_array *ir)
 }
 
 
+/**
+ * Write out a batch of 32 control data bits from the control_data_bits
+ * register to the URB.
+ *
+ * The current value of the vertex_count register determines which DWORD in
+ * the URB receives the control data bits.  The control_data_bits register is
+ * assumed to contain the correct data for the vertex that was most recently
+ * output, and all previous vertices that share the same DWORD.
+ *
+ * This function takes care of ensuring that if no vertices have been output
+ * yet, no control bits are emitted.
+ */
+void
+vec4_gs_visitor::emit_control_data_bits()
+{
+   assert(c->control_data_bits_per_vertex != 0);
+
+   /* Since the URB_WRITE_OWORD message operates with 128-bit (vec4 sized)
+* granularity, we need to use two tricks to ensure that the batch of 32
+* control data bits is written to the appropriate DWORD in the URB.  To
+* select which vec4 we are writing to, we use the "slot {0,1} offset"
+* fields of the message header.  To select which DWORD in the vec4 we are
+* writing to, we use the channel mask fields of the message header.  To
+* avoid punalizing geometry shaders that emit a small number of vertices
+* with extra bookkeeping, we only do each of these tricks when
+* c->prog_data.control_data_header_size_bits is large enough to make it
+* necessary.
+*
+* Note: this means that if we're outputting just a single DWORD of control
+* data bits, we'll actually replicate it four times since we won't do any
+* channel masking.  But that's not a problem since in this case the
+* hardware only pays attention to the first DWORD.
+*/
+   enum brw_urb_write_flags urb_write_flags 

Re: [Mesa-dev] [PATCH 9/9] i965/gs: implement EndPrimitive() functionality in the visitor.

2013-09-09 Thread Ian Romanick
On 09/09/2013 10:20 AM, Paul Berry wrote:
> According to GLSL, the shader may call EndPrimitive() at any point
> during its execution, causing the line or triangle strip currently
> being output to be terminated and a new strip to be begun.
> 
> This is implemented in gen7 hardware by using one control data bit per
> vertex, to indicate whether EndPrimitive() was called after that
> vertex was emitted.
> 
> In order to make this work without sacrificing too much efficiency, we
> accumulate 32 control data bits at a time in a GRF.  When we have
> accumulated 32 bits (or when the shader terminates), we output them to
> the appropriate DWORD in the control data header and reset the
> accumulator to 0.
> 
> We have to take special care to make sure that EndPrimitive() calls
> that occur prior to the first vertex have no effect.

Does this cover the general case of EndPrimitive without a preceeding
EmitVertex?  Something like:

 EmitVertex();
 EndPrimitive();
 EndPrimitive();

And... do we have a test for that? :)

> Since geometry shaders that output a large number of vertices are
> likely to be rare, an optimization kicks in if max_vertices <= 32.  In
> this case, we know that we can wait until the end of shader execution
> before any control data bits need to be output.
> 
> I've tried to write the code in such a way that in the future, we can
> easily adapt it to output stream ID bits (which are two bits/vertex
> instead of one).
> 
> Fixes piglit tests "spec/glsl-1.50/glsl-1.50-geometry-end-primitive *".
> ---
>  src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp | 226 
> +-
>  src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.h   |   2 +
>  2 files changed, 227 insertions(+), 1 deletion(-)
> 
> diff --git a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp 
> b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp
> index 37cde64..1a8ce1a 100644
> --- a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp
> @@ -135,6 +135,23 @@ vec4_gs_visitor::emit_prolog()
> vec4_instruction *inst = emit(MOV(dst_reg(this->vertex_count), 0u));
> inst->force_writemask_all = true;
>  
> +   if (c->control_data_header_size_bits > 0) {
> +  /* Create a virtual register to hold the current set of control data
> +   * bits.
> +   */
> +  this->control_data_bits = src_reg(this, glsl_type::uint_type);
> +
> +  /* If we're outputting more than 32 control data bits, then 
> EmitVertex()
> +   * will set control_data_bits to 0 after emitting the first vertex.
> +   * Otherwise, we need to initialize it to 0 here.
> +   */
> +  if (c->control_data_header_size_bits <= 32) {
> + this->current_annotation = "initialize control data bits";
> + inst = emit(MOV(dst_reg(this->control_data_bits), 0u));
> + inst->force_writemask_all = true;
> +  }
> +   }
> +
> this->current_annotation = NULL;
>  }
>  
> @@ -150,6 +167,16 @@ vec4_gs_visitor::emit_program_code()
>  void
>  vec4_gs_visitor::emit_thread_end()
>  {
> +   if (c->control_data_header_size_bits > 0) {
> +  /* During shader execution, we only ever call emit_control_data_bits()
> +   * just prior to outputting a vertex.  Therefore, the control data bits
> +   * corresponding to the most recently output vertex still need to be
> +   * emitted.
> +   */
> +  current_annotation = "thread end: emit control data bits";
> +  emit_control_data_bits();
> +   }
> +
> /* MRF 0 is reserved for the debugger, so start with message header
>  * in MRF 1.
>  */
> @@ -224,6 +251,124 @@ 
> vec4_gs_visitor::compute_array_stride(ir_dereference_array *ir)
>  }
>  
>  
> +/**
> + * Write out a batch of 32 control data bits from the control_data_bits
> + * register to the URB.
> + *
> + * The current value of the vertex_count register determines which DWORD in
> + * the URB receives the control data bits.  The control_data_bits register is
> + * assumed to contain the correct data for the vertex that was most recently
> + * output, and all previous vertices that share the same DWORD.
> + *
> + * This function takes care of ensuring that if no vertices have been output
> + * yet, no control bits are emitted.
> + */
> +void
> +vec4_gs_visitor::emit_control_data_bits()
> +{
> +   assert(c->control_data_bits_per_vertex != 0);
> +
> +   /* Since the URB_WRITE_OWORD message operates with 128-bit (vec4 sized)
> +* granularity, we need to use two tricks to ensure that the batch of 32
> +* control data bits is written to the appropriate DWORD in the URB.  To
> +* select which vec4 we are writing to, we use the "slot {0,1} offset"
> +* fields of the message header.  To select which DWORD in the vec4 we are
> +* writing to, we use the channel mask fields of the message header.  To
> +* avoid punalizing geometry shaders that emit a small number of vertices
> +* with extra bookkeeping, we only do each o

[Mesa-dev] [PATCH 6/9] i965/gen7: Add the ability to send URB_WRITE_OWORD messages.

2013-09-09 Thread Paul Berry
Previously, brw_urb_WRITE() would always generate a URB_WRITE_HWORD
message, we always wanted to write data to the URB in pairs of varying
slots or larger (an HWORD is 32 bytes, which is 2 varying slots).

In order to support geometry shader EndPrimitive functionality, we'll
need the ability to write to just a single OWORD (16 byte) slot, since
we'll only be outputting 32 of the control data bits at a time.  So
this patch adds a flag that will cause brw_urb_WRITE to generate a
URB_WRITE_OWORD message.
---
 src/mesa/drivers/dri/i965/brw_defines.h | 3 ++-
 src/mesa/drivers/dri/i965/brw_eu.h  | 8 
 src/mesa/drivers/dri/i965/brw_eu_emit.c | 7 ++-
 3 files changed, 16 insertions(+), 2 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_defines.h 
b/src/mesa/drivers/dri/i965/brw_defines.h
index 6db2570..4742103 100644
--- a/src/mesa/drivers/dri/i965/brw_defines.h
+++ b/src/mesa/drivers/dri/i965/brw_defines.h
@@ -1172,7 +1172,8 @@ enum brw_message_target {
 #define BRW_MATH_DATA_VECTOR  0
 #define BRW_MATH_DATA_SCALAR  1
 
-#define BRW_URB_OPCODE_WRITE  0
+#define BRW_URB_OPCODE_WRITE_HWORD  0
+#define BRW_URB_OPCODE_WRITE_OWORD  1
 
 #define BRW_URB_SWIZZLE_NONE  0
 #define BRW_URB_SWIZZLE_INTERLEAVE1
diff --git a/src/mesa/drivers/dri/i965/brw_eu.h 
b/src/mesa/drivers/dri/i965/brw_eu.h
index 4d47cdd..720bc74 100644
--- a/src/mesa/drivers/dri/i965/brw_eu.h
+++ b/src/mesa/drivers/dri/i965/brw_eu.h
@@ -264,6 +264,14 @@ enum brw_urb_write_flags {
BRW_URB_WRITE_USE_CHANNEL_MASKS = 0x20,
 
/**
+* Indicates that the data should be sent to the URB using the
+* URB_WRITE_OWORD message rather than URB_WRITE_HWORD (gen == 7).  This
+* causes offsets to be interpreted as multiples of an OWORD instead of an
+* HWORD, and only allows one OWORD to be written.
+*/
+   BRW_URB_WRITE_OWORD = 0x40,
+
+   /**
 * Convenient combination of flags: end the thread while simultaneously
 * marking the given URB entry as complete.
 */
diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c 
b/src/mesa/drivers/dri/i965/brw_eu_emit.c
index 0995a9a..9b95101 100644
--- a/src/mesa/drivers/dri/i965/brw_eu_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c
@@ -527,7 +527,12 @@ static void brw_set_urb_message( struct brw_compile *p,
  msg_length, response_length, true,
   flags & BRW_URB_WRITE_EOT);
if (brw->gen == 7) {
-  insn->bits3.urb_gen7.opcode = 0; /* URB_WRITE_HWORD */
+  if (flags & BRW_URB_WRITE_OWORD) {
+ assert(msg_length == 2); /* header + one OWORD of data */
+ insn->bits3.urb_gen7.opcode = BRW_URB_OPCODE_WRITE_OWORD;
+  } else {
+ insn->bits3.urb_gen7.opcode = BRW_URB_OPCODE_WRITE_HWORD;
+  }
   insn->bits3.urb_gen7.offset = offset;
   assert(swizzle_control != BRW_URB_SWIZZLE_TRANSPOSE);
   insn->bits3.urb_gen7.swizzle_control = swizzle_control;
-- 
1.8.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 9/9] i965/gs: implement EndPrimitive() functionality in the visitor.

2013-09-09 Thread Ian Romanick
On 09/09/2013 11:31 AM, Paul Berry wrote:
> On 9 September 2013 09:18, Ian Romanick  > wrote:
> 
> On 09/09/2013 10:20 AM, Paul Berry wrote:
> > According to GLSL, the shader may call EndPrimitive() at any point
> > during its execution, causing the line or triangle strip currently
> > being output to be terminated and a new strip to be begun.
> >
> > This is implemented in gen7 hardware by using one control data bit per
> > vertex, to indicate whether EndPrimitive() was called after that
> > vertex was emitted.
> >
> > In order to make this work without sacrificing too much efficiency, we
> > accumulate 32 control data bits at a time in a GRF.  When we have
> > accumulated 32 bits (or when the shader terminates), we output them to
> > the appropriate DWORD in the control data header and reset the
> > accumulator to 0.
> >
> > We have to take special care to make sure that EndPrimitive() calls
> > that occur prior to the first vertex have no effect.
> 
> Does this cover the general case of EndPrimitive without a preceeding
> EmitVertex?  Something like:
> 
>  EmitVertex();
>  EndPrimitive();
>  EndPrimitive();
> 
> And... do we have a test for that? :)
> 
> Yes, that should work fine.  What we do in EndPrimitive() is set the
> appropriate bit in the bitfield, so calling EndPrimitive() again should
> just redundantly set a bit that's already set.
> 
> I don't think we have a test for it.  I'll add that to my list.

Ah, that makes sense.

FWIW, the series is

Reviewed-by: Ian Romanick 

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 00/15] i965/gen6+: Support 128 varying components.

2013-09-09 Thread Paul Berry
On 9 September 2013 09:51, Ian Romanick  wrote:

> On 09/03/2013 06:18 PM, Paul Berry wrote:
> > GL 3.2 requires us to support 128 varying components for geometry
> > shader outputs and fragment shader inputs, and 64 varying components
> > otherwise.  But there's no hardware limitation that restricts us to 64
> > varying components, and core Mesa doesn't currently allow different
> > stages to have different maximum values, so I've gone ahead and
> > enabled 128 varying components for all stages.  This has the advantage
>
> I was just looking at this today while working on the standalone
> compiler.  To use the standalone compiler for shader validation, we want
> to advertise the minimums required by the spec.  To do that, we need to
> be able to track the input/output limits separately.  Since the varying
> limit changed from 64 to 60, but the vertex shader output limit is still
> 64 (where gl_Position is counted?), this may be annoying to implement
> fully.
>
> For the standalone compiler work, I'll add some of this plumbing.  That
> may impact some of your changes, depending on the order things land.
> Since my patches depend on Ken's built-in rework, yours will almost
> surely go first.
>
> > of increased test coverage, since piglit already has a number of tests
> > to validate that the maximum advertised number of varying components
> > can be exchanged between VS and FS.  I've also gone ahead and
> > increased the limit for gen6 as well as gen7, since it required very
> > little extra work.
> >
> > Previously, on gen6+, we relied on the SF/SBE stage of the pipeline to
> > reorder the outputs from the GS (or VS) to match the input ordering
> > required by the FS.  This allowed us to determine the order of FS
> > inputs solely based on the FS, so we avoided recompiles when separate
> > shader objects were in use.  But there's a problem with that: the
> > SF/SBE stage can't arbitrarily reorder more than 16 VUE slots (1 slot
> > = 4 varying components).  To avoid introducing additional recompiles
> > with previously-supported shaders, I've taken a hybrid approach to
> > choosing the FS input ordering: if the FS uses 16 or fewer input
> > varying slots, then it orders them solely based on its own
> > requirements.  If it uses more than 16 input varying slots, then it
> > orders them according to the GS (or VS) output VUE map, so that the
> > SF/SBE stage doesn't have to do any reordering.
> >
> > Patches 1-3 modify the FS so that it exposes the order of input
> > varyings it needs via prog_data.
> >
> > Patches 4-6 modify the SF/SBE setup so that it consults the FS
> > prog_data when choosing how to re-order varyings (previously, it
> > implicitly assumed an order that happened to match the order the FS
> > was using).
> >
> > Patch 7 is a minor optimization made possible by patches 1-6: now that
> > the SF/SBE setup no longer makes implicit assumptions about the order
> > of the FS inputs, the FS no longer has to have dummy input slots for
> > gl_FragCoord and gl_FrontFacing.
>
> \o/
>
> > Patch 8 tweaks the VUE map slightly so that it is uniquely determined
> > by a single 64-bit bitfield.  This will allow us to store the bitfield
> > in the FS program key rather than the entire VUE map.
> >
> > Patch 9 is a minor optimization made possible by patch 8: now that the
> > VUE map is uniquely determined by a single 64-bit bitfield, we no
> > longer have to store the entire VUE map in the GS program key.
> >
> > Patches 10-11 modify the FS to order its inputs according to the GS
> > (or VS) output VUE map when there are more than 16 input slots in use.
> >
> > Patch 12 adjusts the VS and GS code so that it can output all 32
> > varyings to the VUE, even if it requires more than two URB writes to
> > do so.
> >
> > Patches 13-14 make some minor gen6-specific adjustments to allow for
> > the larger URB entries needed for 32 vayings: the Gen6 transform
> > feedback code sometimes needs to do 2 URB writes instead of 1, and an
> > incorrect assertion in the gen6 URB setup needs to be fixed.
> >
> > Patch 15 increases the value of MaxVarying from 16 to 32 for gen6+.
> >
> > The series is available on branch "increase-max-varyings" of
> > https://github.com/stereotype441/mesa.git.  I've piglit tested it on
> > gen5, gen6, and gen7.
>
> Do we have tests that use more than 16 varying vectors?  Some of the
> generated varying packing tests, right?
>

Yes, we have a number of varying packing tests that exercise this (though
they aren't generated tests, IIRC).  Also,
spec/EXT_transform_feedback/max-varyings and shaders/glsl-max-varyings.


>
> > [PATCH 01/15] i965/fs: Expose "urb_setup" as part of brw_wm_prog_data.
> > [PATCH 02/15] i965/fs: Change brw_wm_prog_data::urb_read_length to
> num_varying_inputs.
> > [PATCH 03/15] i965/fs: Consult brw_wm_prog_data::num_varying_inputs when
> setting up WM state.
> > [PATCH 04/15] i965/sf: Use BRW_SF_URB_ENTRY_READ_OFFSET rather than
> hardcoded values.
> > [PATCH 05/15] i965/sf

Re: [Mesa-dev] [PATCH 3/3] i965: Use brw_stage_state for WM data as well.

2013-09-09 Thread Paul Berry
On 5 September 2013 16:40, Kenneth Graunke  wrote:

> This gets the VS, GS, and PS all using the same data structure.
>
> Signed-off-by: Kenneth Graunke 
>

Series is:

Reviewed-by: Paul Berry 

BTW, Once both this series and my series "i965/gen7 geometry shader
support, series 4" have landed, we'll probably want to modify
upload_ps_state() to make use of gen7_upload_constant_state().  I don't
have a preference which one of us works on that :)
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 00/15] i965/gen6+: Support 128 varying components.

2013-09-09 Thread Ian Romanick
On 09/03/2013 06:18 PM, Paul Berry wrote:
> GL 3.2 requires us to support 128 varying components for geometry
> shader outputs and fragment shader inputs, and 64 varying components
> otherwise.  But there's no hardware limitation that restricts us to 64
> varying components, and core Mesa doesn't currently allow different
> stages to have different maximum values, so I've gone ahead and
> enabled 128 varying components for all stages.  This has the advantage

I was just looking at this today while working on the standalone
compiler.  To use the standalone compiler for shader validation, we want
to advertise the minimums required by the spec.  To do that, we need to
be able to track the input/output limits separately.  Since the varying
limit changed from 64 to 60, but the vertex shader output limit is still
64 (where gl_Position is counted?), this may be annoying to implement fully.

For the standalone compiler work, I'll add some of this plumbing.  That
may impact some of your changes, depending on the order things land.
Since my patches depend on Ken's built-in rework, yours will almost
surely go first.

> of increased test coverage, since piglit already has a number of tests
> to validate that the maximum advertised number of varying components
> can be exchanged between VS and FS.  I've also gone ahead and
> increased the limit for gen6 as well as gen7, since it required very
> little extra work.
> 
> Previously, on gen6+, we relied on the SF/SBE stage of the pipeline to
> reorder the outputs from the GS (or VS) to match the input ordering
> required by the FS.  This allowed us to determine the order of FS
> inputs solely based on the FS, so we avoided recompiles when separate
> shader objects were in use.  But there's a problem with that: the
> SF/SBE stage can't arbitrarily reorder more than 16 VUE slots (1 slot
> = 4 varying components).  To avoid introducing additional recompiles
> with previously-supported shaders, I've taken a hybrid approach to
> choosing the FS input ordering: if the FS uses 16 or fewer input
> varying slots, then it orders them solely based on its own
> requirements.  If it uses more than 16 input varying slots, then it
> orders them according to the GS (or VS) output VUE map, so that the
> SF/SBE stage doesn't have to do any reordering.
> 
> Patches 1-3 modify the FS so that it exposes the order of input
> varyings it needs via prog_data.
> 
> Patches 4-6 modify the SF/SBE setup so that it consults the FS
> prog_data when choosing how to re-order varyings (previously, it
> implicitly assumed an order that happened to match the order the FS
> was using).
> 
> Patch 7 is a minor optimization made possible by patches 1-6: now that
> the SF/SBE setup no longer makes implicit assumptions about the order
> of the FS inputs, the FS no longer has to have dummy input slots for
> gl_FragCoord and gl_FrontFacing.

\o/

> Patch 8 tweaks the VUE map slightly so that it is uniquely determined
> by a single 64-bit bitfield.  This will allow us to store the bitfield
> in the FS program key rather than the entire VUE map.
> 
> Patch 9 is a minor optimization made possible by patch 8: now that the
> VUE map is uniquely determined by a single 64-bit bitfield, we no
> longer have to store the entire VUE map in the GS program key.
> 
> Patches 10-11 modify the FS to order its inputs according to the GS
> (or VS) output VUE map when there are more than 16 input slots in use.
> 
> Patch 12 adjusts the VS and GS code so that it can output all 32
> varyings to the VUE, even if it requires more than two URB writes to
> do so.
> 
> Patches 13-14 make some minor gen6-specific adjustments to allow for
> the larger URB entries needed for 32 vayings: the Gen6 transform
> feedback code sometimes needs to do 2 URB writes instead of 1, and an
> incorrect assertion in the gen6 URB setup needs to be fixed.
> 
> Patch 15 increases the value of MaxVarying from 16 to 32 for gen6+.
> 
> The series is available on branch "increase-max-varyings" of
> https://github.com/stereotype441/mesa.git.  I've piglit tested it on
> gen5, gen6, and gen7.

Do we have tests that use more than 16 varying vectors?  Some of the
generated varying packing tests, right?

> [PATCH 01/15] i965/fs: Expose "urb_setup" as part of brw_wm_prog_data.
> [PATCH 02/15] i965/fs: Change brw_wm_prog_data::urb_read_length to 
> num_varying_inputs.
> [PATCH 03/15] i965/fs: Consult brw_wm_prog_data::num_varying_inputs when 
> setting up WM state.
> [PATCH 04/15] i965/sf: Use BRW_SF_URB_ENTRY_READ_OFFSET rather than hardcoded 
> values.
> [PATCH 05/15] i965/sf: Consolidate common code for setting up gen6-7 
> attribute overrides.
> [PATCH 06/15] i965/sf: Consult brw_wm_prog_data when setting up SF/SBE state.
> [PATCH 07/15] i965/fs: Stop wasting input attribute space on gl_FragCoord and 
> gl_Frontfacing.
> [PATCH 08/15] i965/gen6+: Remove VUE map dependency on userclip_active.
> [PATCH 09/15] i965/gs: Stop storing an input VUE map in the GS program key.

Re: [Mesa-dev] [PATCH 18/21] glsl: Write a new built-in function module.

2013-09-09 Thread Matt Turner
On Wed, Sep 4, 2013 at 3:22 PM, Kenneth Graunke  wrote:
> +static bool
> +texture_query_lod(const _mesa_glsl_parse_state *state)
> +{
> +   return state->target == fragment_shader &&
> +  (state->is_version(400, 0) || state->ARB_texture_query_lod_enable);
> +}

Reminder about the whole fiasco about textureQueryLOD vs
textureQueryLod. Probably just need to remove the
state->is_version(400, 0) for now.

> +ir_function_signature *
> +builtin_builder::_modf(const glsl_type *type)
> +{
> +   ir_variable *x = in_var(type, "x");
> +   ir_variable *i = new(mem_ctx) ir_variable(type, "i", ir_var_function_out);
> +   MAKE_SIG(type, v130, 2, x, i);
> +
> +   ir_variable *t = body.make_temp(type, "t");
> +   body.emit(assign(t, expr(ir_unop_trunc, x)));
> +   body.emit(assign(i, t));
> +   body.emit(ret(sub(x, t)));
> +
> +   return sig;
> +}

Create an out_var() helper function and use it here.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] glsl: Correctly validate fma()'s types.

2013-09-09 Thread Matt Turner
lrp() can take a scalar as a third argument, and fma() cannot.
---
 src/glsl/ir_validate.cpp | 6 ++
 1 file changed, 6 insertions(+)

diff --git a/src/glsl/ir_validate.cpp b/src/glsl/ir_validate.cpp
index 105f615..4898211 100644
--- a/src/glsl/ir_validate.cpp
+++ b/src/glsl/ir_validate.cpp
@@ -531,6 +531,12 @@ ir_validate::visit_leave(ir_expression *ir)
   break;
 
case ir_triop_fma:
+  assert(ir->type->base_type == GLSL_TYPE_FLOAT);
+  assert(ir->type == ir->operands[0]->type);
+  assert(ir->type == ir->operands[1]->type);
+  assert(ir->type == ir->operands[2]->type);
+  break;
+
case ir_triop_lrp:
   assert(ir->operands[0]->type->base_type == GLSL_TYPE_FLOAT);
   assert(ir->operands[0]->type == ir->operands[1]->type);
-- 
1.8.3.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 18/21] glsl: Write a new built-in function module.

2013-09-09 Thread Kenneth Graunke

On 09/09/2013 10:39 AM, Matt Turner wrote:

On Wed, Sep 4, 2013 at 3:22 PM, Kenneth Graunke  wrote:

+static bool
+texture_query_lod(const _mesa_glsl_parse_state *state)
+{
+   return state->target == fragment_shader &&
+  (state->is_version(400, 0) || state->ARB_texture_query_lod_enable);
+}


Reminder about the whole fiasco about textureQueryLOD vs
textureQueryLod. Probably just need to remove the
state->is_version(400, 0) for now.


Already done.  Thanks for pointing this out.


+ir_function_signature *
+builtin_builder::_modf(const glsl_type *type)
+{
+   ir_variable *x = in_var(type, "x");
+   ir_variable *i = new(mem_ctx) ir_variable(type, "i", ir_var_function_out);
+   MAKE_SIG(type, v130, 2, x, i);
+
+   ir_variable *t = body.make_temp(type, "t");
+   body.emit(assign(t, expr(ir_unop_trunc, x)));
+   body.emit(assign(i, t));
+   body.emit(ret(sub(x, t)));
+
+   return sig;
+}


Create an out_var() helper function and use it here.


Sure.  I hadn't since there was only one instance of an out var being 
created, but apparently there are more coming in future built-ins.  Done.


--Ken

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 10/15] i965: Allow immediates to be folded into logical and shift instructions.

2013-09-09 Thread Matt Turner
Can I assume this has your R-b?
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 10/15] i965: Allow immediates to be folded into logical and shift instructions.

2013-09-09 Thread Paul Berry
On 9 September 2013 11:23, Matt Turner  wrote:

> Can I assume this has your R-b?
>

Yes.  Sorry about that :)
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 4/5] glsl: Implement MESA_shader_integer_mix extension.

2013-09-09 Thread Ian Romanick
Two tiny edits, that just occurred to me, are below...

On 09/06/2013 07:57 PM, Matt Turner wrote:
> Because why doesn't GLSL allow you to do this already?
> ---
>  docs/specs/MESA_shader_integer_mix.spec | 125 
> 
>  src/glsl/builtin_functions.cpp  |  15 
>  src/glsl/glcpp/glcpp-parse.y|   3 +
>  src/glsl/glsl_parser_extras.cpp |   1 +
>  src/glsl/glsl_parser_extras.h   |   2 +
>  src/mesa/main/extensions.c  |   1 +
>  src/mesa/main/mtypes.h  |   3 +-
>  7 files changed, 149 insertions(+), 1 deletion(-)
>  create mode 100644 docs/specs/MESA_shader_integer_mix.spec
> 
> diff --git a/docs/specs/MESA_shader_integer_mix.spec 
> b/docs/specs/MESA_shader_integer_mix.spec
> new file mode 100644
> index 000..a789690
> --- /dev/null
> +++ b/docs/specs/MESA_shader_integer_mix.spec
> @@ -0,0 +1,125 @@
> +Name
> +
> +MESA_shader_integer_mix
> +
> +Name Strings
> +
> +GL_MESA_shader_integer_mix
> +
> +Contact
> +
> +Matt Turner (matt.turner 'at' intel.com)
> +
> +Contributors
> +
> +Matt Turner, Intel
> +
> +Status
> +
> +Shipping
> +
> +Version
> +
> +Last Modified Date: 09/06/2013
> +Author Revision:4
> +
> +Number
> +
> +
> +
> +Dependencies
> +
> +OpenGL 3.0 or OpenGL ES 3.0 is required.
> +

This extension interacts with GL_ARB_ES3_compatibility.

> +This extension is written against the OpenGL 4.4 (core) specification
> +and the GLSL 4.40 specification.
> +
> +Overview
> +
> +GLSL 1.30 (and GLSL ES 3.00) expanded the mix() built-in function to
> +operate on a boolean third argument that does not interpolate but
> +selects. This extension extends mix() to select between int, uint,
> +and bool components.
> +
> +New Procedures and Functions
> +
> +None.
> +
> +New Tokens
> +
> +None.
> +
> +Additions to Chapter 8 of the GLSL 4.40 Specification (Built-in Functions)
> +
> +Modify Section 8.3, Common Functions
> +
> +Additions to the table listing common built-in functions:
> +
> +  Syntax   Description
> +  ---  
> --
> +  genIType mix(genIType x, Selects which vector each returned 
> component comes
> +   genIType y, from. For a component of a that is false, 
> the
> +   genBType a) corresponding component of x is returned. 
> For a
> +  genUType mix(genUType x, component of a that is true, the 
> corresponding
> +   genUType y, component of y is returned.
> +   genBType a)
> +  genBType mix(genBType x,
> +   genBType y,
> +   genBType a)
> +
> +Additions to the AGL/GLX/WGL Specifications
> +
> +None.
> +
> +Modifications to The OpenGL Shading Language Specification, Version 4.40
> +
> +Including the following line in a shader can be used to control the
> +language features described in this extension:
> +
> +  #extension GL_MESA_shader_integer_mix : 
> +
> +where  is as specified in section 3.3.
> +
> +New preprocessor #defines are added to the OpenGL Shading Language:
> +
> +  #define GL_MESA_shader_integer_mix1
> +

Interactions with ARB_ES3_compatibility

On desktop implementations that support ARB_ES3_compatibility,
GL_MESA_shader_integer_mix can be enabled (and the new functions
used) in shaders declared with '#version 300 es'.

> +GLX Protocol
> +
> +None.
> +
> +Errors
> +
> +None.
> +
> +New State
> +
> +None.
> +
> +New Implementation Dependent State
> +
> +None.
> +
> +Issues
> +
> +1) Should we allow linear interpolation of integers via a non-boolean
> +   third component?
> +
> +RESOLVED: No.
> +
> +2) Should we mix() to select between boolean components?
> +
> +RESOLVED: Yes. Implementing the same functionality using casts would be
> +possible but ugly.
> +
> +Revision History
> +
> +Rev.Date  AuthorChanges
> +    -
> +  4   09/06/2013  mattst88  Allow extension on OpenGL ES 3.0.
> +
> +  3   08/28/2013  mattst88  Add #extension/#define changes.
> +
> +  2   08/26/2013  mattst88  Change vendor prefix to MESA. Add mix() that
> +selects between boolean components.
> +  1   08/26/2013  mattst88  Initial revision
> diff --git a/src/glsl/builtin_functions.cpp b/src/glsl/builtin_functions.cpp
> index 7b7bae6..5e697dc 100644
> --- a/src/glsl/builtin_functions.cpp
> +++ b/src/glsl/builtin_functions.cpp
> @@ -747,6 +747,21 @@ builtin_builder::create_builtins()
>  _mix_sel(glsl_type::vec2_type,  glsl_type::bvec2_type),
>  _mix_sel(glsl_type::vec3_type,  glsl_type::bvec3_type),
>  _mix_sel(glsl_type::vec4_type,  glsl_type

Re: [Mesa-dev] [PATCH 05/21] glsl: Pass _mesa_glsl_parse_state into matching_signature and such.

2013-09-09 Thread Paul Berry
On 8 September 2013 19:49, Kenneth Graunke  wrote:

> On 09/08/2013 08:48 AM, Paul Berry wrote:
>
>> On 4 September 2013 15:22, Kenneth Graunke > **> wrote:
>>
>> During compilation, we'll use this to determine built-in availability.
>> All the prototypes will be available, but we can filter out the ones
>> that aren't actually available.
>>
>>
>> I'm having trouble understanding this sentence because it sounds like
>> you're using the word "available" to mean two different things.  Do you
>> perhaps mean something like "when looking up the name of a built-in
>> function, we'll iterate through a list of all the prototypes that exist
>> in all GLSL versions and extensions, but we'll filter out the ones that
>> aren't actually available in the current GLSL version with the currently
>> enabled extensions"?
>>
>
> Poor wording on my part; your understanding is basically correct.
>
> I've replaced it with this text:
> "The plan is to have a single shader containing every built-in in every
> version of the language, but filter out the ones that aren't actually
> available to the shader being compiled."
>
>
>  At link time, we don't actually need this filtering capability, since
>> we've already imported the prototypes and flagged them as
>> "is_builtin."
>>
>>
>> This sentence tripped me up a bit since the last patch removed the
>> "is_builtin" flag in favor of a function.  Do you mean to say that we've
>> already imported the prototypes and given them a non-null builtin_info
>> (so is_builtin() will return true)?
>>
>
> By "is_builtin flag" I meant "the value of the is_builtin() method."
>
>
>
>> The linker won't import additional prototypes, so it won't pull in any
>> unavailable built-ins.  Conversely, the is_builtin flag will prevent a
>> shader from defining its own prototype and fooling the linker to
>> import
>> a built-in's body.
>>
>>
>> Similarly, in this case do you mean "conversely, prototypes defined by
>> the shader will have builtin_info = null, so they will return false for
>> is_builtin() and that will prevent the linker from trying to import a
>> built-in's body for those functions"?
>>
>>   All that to say: we can just pass in NULL.  It'll be
>> fine.
>>
>
> Yes.  I've replaced this text with:
>
> "At link time, we don't actually need this filtering capability: we've
> already imported prototypes for every built-in that the shader actually
> calls, and they're flagged as is_builtin().  The linker doesn't import
> any additional prototypes, so it won't pull in any unavailable
> built-ins.  When resolving prototypes to function definitions, the
> linker ensures the values of is_builtin() match, which means that a
> shader can't trick the linker into importing the body of an unavailable
> built-in by defining a suspiciously similar prototype.
>
> In other words, during linking, we can just pass in NULL.  It will work
> out fine."
>

Ok, that makes sense.  Thanks for the clarification.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 69148] New: configure does not accept --with-driver

2013-09-09 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=69148

  Priority: medium
Bug ID: 69148
  Assignee: mesa-dev@lists.freedesktop.org
   Summary: configure does not accept --with-driver
  Severity: normal
Classification: Unclassified
OS: Linux (All)
  Reporter: genpfa...@gmail.com
  Hardware: Other
Status: NEW
   Version: 9.2
 Component: Mesa core
   Product: Mesa

The configure script does not accept --with-driver as indicated by the autoconf
documentation:

http://www.mesa3d.org/autoconf.html

-- 
You are receiving this mail because:
You are the assignee for the bug.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] i965/vec4: Only zero out unused message components when there are any.

2013-09-09 Thread Kenneth Graunke
Otherwise, coordinates with four components would result in a MOV
with a destination writemask that has no channels enabled:

mov(8) g115<1>.F 0D { align16 WE_normal NoDDChk 1Q };

At best, this is stupid: we emit code that shouldn't do anything.
Worse, it apparently causes GPU hangs (observable with Chris's
textureGather test on CubeArrays.)

Signed-off-by: Kenneth Graunke 
Cc: Chris Forbes 
Cc: mesa-sta...@lists.freedesktop.org
---
 src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 6 --
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp 
b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
index ca52fd3..8cfe4e3 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
@@ -2245,8 +2245,10 @@ vec4_visitor::visit(ir_texture *ir)
 emit(MOV(dst_reg(MRF, param_base, ir->coordinate->type, coord_mask),
  coordinate));
   }
-  emit(MOV(dst_reg(MRF, param_base, ir->coordinate->type, zero_mask),
-  src_reg(0)));
+  if (zero_mask != 0) {
+ emit(MOV(dst_reg(MRF, param_base, ir->coordinate->type, zero_mask),
+  src_reg(0)));
+  }
   /* Load the shadow comparitor */
   if (ir->shadow_comparitor && ir->op != ir_txd) {
 emit(MOV(dst_reg(MRF, param_base + 1, ir->shadow_comparitor->type,
-- 
1.8.3.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] i965/gen7: always lower textureGrad() on gen7

2013-09-09 Thread Ian Romanick
On 09/05/2013 08:57 AM, Chia-I Wu wrote:
> On Thu, Sep 5, 2013 at 5:12 PM, Chris Forbes  wrote:
>> A possible explanation for the perf change is that Xonotic uses
>> anisotropic filtering at this quality level. Lowering to txl defeats
>> it.
> I had a look at that.  gl_sampler->MaxAnisotropy is never greater than
> 1.0 in gen7_update_sampler_state() so there is no anisotropic
> filtering in this case.
> 
> It makes sense to me that avoiding punting to SIMD8 helps the
> performance.  But it is not clear to me why >10% performance change
> can still be observed when INTEL_DEBUG=no16 is specified.  A
> reasonable explanation is that the image quality is degraded in some
> way, which is why I am still nervous about the change.
> 
> An alternative approach to avoid punting seems to emulate SIMD16
> sample_d with two SIMD8 sample_d.  It will take longer to implement
> given my familiarity with the code, and may be less performant.  BUt
> that would allow things like anisotropic filtering to be honored.
> 
> 
>> It would be worth doing an image quality comparison before and after the 
>> change.
> Yeah, that is worth doing.  I will do that.

Any results?  Still waiting...

>> -- Chris
>>
>> On Thu, Sep 5, 2013 at 8:35 PM, Chia-I Wu  wrote:
>>> sample_d is slower than the lowered version on gen7.  For gen7, this 
>>> improves
>>> Xonotic benchmark with Ultimate effects by as much as 25%:
>>>
>>>  before the change:  40.06 fps
>>>  after the change:   51.10 fps
>>>  after the change with INTEL_DEBUG=no16: 44.46 fps
>>>
>>> As sample_d is not allowed in SIMD16 mode, I firstly thought the difference
>>> was from SIMD8 versus SIMD16.  If that was the case, we would want to apply
>>> brw_lower_texture_gradients() only on fragment shaders in SIMD16 mode.
>>>
>>> But, as the numbers show, there is still 10% improvement when SIMD16 is 
>>> forced
>>> off after the change.  Thus textureGrad() is lowered unconditionally for 
>>> now.
>>> Due to this and that I haven't tried it on Haswell, this is still RFC.
>>>
>>> No piglit regressions.
>>>
>>> Signed-off-by: Chia-I Wu 
>>> ---
>>>  .../dri/i965/brw_lower_texture_gradients.cpp   | 54 
>>> ++
>>>  1 file changed, 36 insertions(+), 18 deletions(-)
>>>
>>> diff --git a/src/mesa/drivers/dri/i965/brw_lower_texture_gradients.cpp 
>>> b/src/mesa/drivers/dri/i965/brw_lower_texture_gradients.cpp
>>> index 1589a20..f3fcb56 100644
>>> --- a/src/mesa/drivers/dri/i965/brw_lower_texture_gradients.cpp
>>> +++ b/src/mesa/drivers/dri/i965/brw_lower_texture_gradients.cpp
>>> @@ -34,8 +34,8 @@ using namespace ir_builder;
>>>
>>>  class lower_texture_grad_visitor : public ir_hierarchical_visitor {
>>>  public:
>>> -   lower_texture_grad_visitor(bool has_sample_d_c)
>>> -  : has_sample_d_c(has_sample_d_c)
>>> +   lower_texture_grad_visitor(bool has_sample_d, bool has_sample_d_c)
>>> +  : has_sample_d(has_sample_d), has_sample_d_c(has_sample_d_c)
>>> {
>>>progress = false;
>>> }
>>> @@ -44,6 +44,7 @@ public:
>>>
>>>
>>> bool progress;
>>> +   bool has_sample_d;
>>> bool has_sample_d_c;
>>>
>>>  private:
>>> @@ -90,22 +91,33 @@ txs_type(const glsl_type *type)
>>>  ir_visitor_status
>>>  lower_texture_grad_visitor::visit_leave(ir_texture *ir)
>>>  {
>>> -   /* Only lower textureGrad with shadow samplers */
>>> -   if (ir->op != ir_txd || !ir->shadow_comparitor)
>>> +   if (ir->op != ir_txd)
>>>return visit_continue;
>>>
>>> -   /* Lower textureGrad() with samplerCubeShadow even if we have the 
>>> sample_d_c
>>> -* message.  GLSL provides gradients for the 'r' coordinate.  
>>> Unfortunately:
>>> -*
>>> -* From the Ivybridge PRM, Volume 4, Part 1, sample_d message 
>>> description:
>>> -* "The r coordinate contains the faceid, and the r gradients are 
>>> ignored
>>> -*  by hardware."
>>> -*
>>> -* We likely need to do a similar treatment for samplerCube and
>>> -* samplerCubeArray, but we have insufficient testing for that at the 
>>> moment.
>>> -*/
>>> -   bool need_lowering = !has_sample_d_c ||
>>> -  ir->sampler->type->sampler_dimensionality == GLSL_SAMPLER_DIM_CUBE;
>>> +   bool need_lowering = false;
>>> +
>>> +   if (ir->shadow_comparitor) {
>>> +  /* Lower textureGrad() with samplerCubeShadow even if we have the
>>> +   * sample_d_c message.  GLSL provides gradients for the 'r' 
>>> coordinate.
>>> +   * Unfortunately:
>>> +   *
>>> +   * From the Ivybridge PRM, Volume 4, Part 1, sample_d message
>>> +   * description: "The r coordinate contains the faceid, and the r
>>> +   * gradients are ignored by hardware."
>>> +   */
>>> +  if (ir->sampler->type->sampler_dimensionality == 
>>> GLSL_SAMPLER_DIM_CUBE)
>>> + need_lowering = true;
>>> +  else if (!has_sample_d_c)
>>> + need_lowering = true;
>>> +   }
>>> +   else {
>>> +  /* We likely need to do a similar treatment for samplerCube and

Re: [Mesa-dev] [PATCH 9/9] i965/gs: implement EndPrimitive() functionality in the visitor.

2013-09-09 Thread Paul Berry
On 9 September 2013 09:18, Ian Romanick  wrote:

> On 09/09/2013 10:20 AM, Paul Berry wrote:
> > According to GLSL, the shader may call EndPrimitive() at any point
> > during its execution, causing the line or triangle strip currently
> > being output to be terminated and a new strip to be begun.
> >
> > This is implemented in gen7 hardware by using one control data bit per
> > vertex, to indicate whether EndPrimitive() was called after that
> > vertex was emitted.
> >
> > In order to make this work without sacrificing too much efficiency, we
> > accumulate 32 control data bits at a time in a GRF.  When we have
> > accumulated 32 bits (or when the shader terminates), we output them to
> > the appropriate DWORD in the control data header and reset the
> > accumulator to 0.
> >
> > We have to take special care to make sure that EndPrimitive() calls
> > that occur prior to the first vertex have no effect.
>
> Does this cover the general case of EndPrimitive without a preceeding
> EmitVertex?  Something like:
>
>  EmitVertex();
>  EndPrimitive();
>  EndPrimitive();
>
> And... do we have a test for that? :)
>

Yes, that should work fine.  What we do in EndPrimitive() is set the
appropriate bit in the bitfield, so calling EndPrimitive() again should
just redundantly set a bit that's already set.

I don't think we have a test for it.  I'll add that to my list.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] i965/gen7: always lower textureGrad() on gen7

2013-09-09 Thread Ian Romanick
On 09/06/2013 05:05 AM, Chia-I Wu wrote:
> On Thu, Sep 5, 2013 at 9:57 PM, Chia-I Wu  wrote:
>> On Thu, Sep 5, 2013 at 5:12 PM, Chris Forbes  wrote:
>>> A possible explanation for the perf change is that Xonotic uses
>>> anisotropic filtering at this quality level. Lowering to txl defeats
>>> it.
>> I had a look at that.  gl_sampler->MaxAnisotropy is never greater than
>> 1.0 in gen7_update_sampler_state() so there is no anisotropic
>> filtering in this case.
>>
>> It makes sense to me that avoiding punting to SIMD8 helps the
>> performance.  But it is not clear to me why >10% performance change
>> can still be observed when INTEL_DEBUG=no16 is specified.  A
>> reasonable explanation is that the image quality is degraded in some
>> way, which is why I am still nervous about the change.
> With INTEL_DEBUG=no16 set, the same trick hurts the performance on
> Haswell by about 5%.  That is, sample_d on Haswell is faster than the
> one emulated with sample_l.

What is the delta if sample_d is used for just SIMD8 shaders on HSW?
Even when the shader can go SIMD16, some fragments will use the SIMD8 path.

> But since the trick makes SIMD16 possible, it gains 5% more fps when
> INTEL_DEBUG=no16 is not set.
> 
>> An alternative approach to avoid punting seems to emulate SIMD16
>> sample_d with two SIMD8 sample_d.  It will take longer to implement
>> given my familiarity with the code, and may be less performant.  BUt
>> that would allow things like anisotropic filtering to be honored.
>>
>>
>>> It would be worth doing an image quality comparison before and after the 
>>> change.
>> Yeah, that is worth doing.  I will do that.
>>
>>>
>>> -- Chris
>>>
>>> On Thu, Sep 5, 2013 at 8:35 PM, Chia-I Wu  wrote:
 sample_d is slower than the lowered version on gen7.  For gen7, this 
 improves
 Xonotic benchmark with Ultimate effects by as much as 25%:

  before the change:  40.06 fps
  after the change:   51.10 fps
  after the change with INTEL_DEBUG=no16: 44.46 fps

 As sample_d is not allowed in SIMD16 mode, I firstly thought the difference
 was from SIMD8 versus SIMD16.  If that was the case, we would want to apply
 brw_lower_texture_gradients() only on fragment shaders in SIMD16 mode.

 But, as the numbers show, there is still 10% improvement when SIMD16 is 
 forced
 off after the change.  Thus textureGrad() is lowered unconditionally for 
 now.
 Due to this and that I haven't tried it on Haswell, this is still RFC.

 No piglit regressions.

 Signed-off-by: Chia-I Wu 
 ---
  .../dri/i965/brw_lower_texture_gradients.cpp   | 54 
 ++
  1 file changed, 36 insertions(+), 18 deletions(-)

 diff --git a/src/mesa/drivers/dri/i965/brw_lower_texture_gradients.cpp 
 b/src/mesa/drivers/dri/i965/brw_lower_texture_gradients.cpp
 index 1589a20..f3fcb56 100644
 --- a/src/mesa/drivers/dri/i965/brw_lower_texture_gradients.cpp
 +++ b/src/mesa/drivers/dri/i965/brw_lower_texture_gradients.cpp
 @@ -34,8 +34,8 @@ using namespace ir_builder;

  class lower_texture_grad_visitor : public ir_hierarchical_visitor {
  public:
 -   lower_texture_grad_visitor(bool has_sample_d_c)
 -  : has_sample_d_c(has_sample_d_c)
 +   lower_texture_grad_visitor(bool has_sample_d, bool has_sample_d_c)
 +  : has_sample_d(has_sample_d), has_sample_d_c(has_sample_d_c)
 {
progress = false;
 }
 @@ -44,6 +44,7 @@ public:


 bool progress;
 +   bool has_sample_d;
 bool has_sample_d_c;

  private:
 @@ -90,22 +91,33 @@ txs_type(const glsl_type *type)
  ir_visitor_status
  lower_texture_grad_visitor::visit_leave(ir_texture *ir)
  {
 -   /* Only lower textureGrad with shadow samplers */
 -   if (ir->op != ir_txd || !ir->shadow_comparitor)
 +   if (ir->op != ir_txd)
return visit_continue;

 -   /* Lower textureGrad() with samplerCubeShadow even if we have the 
 sample_d_c
 -* message.  GLSL provides gradients for the 'r' coordinate.  
 Unfortunately:
 -*
 -* From the Ivybridge PRM, Volume 4, Part 1, sample_d message 
 description:
 -* "The r coordinate contains the faceid, and the r gradients are 
 ignored
 -*  by hardware."
 -*
 -* We likely need to do a similar treatment for samplerCube and
 -* samplerCubeArray, but we have insufficient testing for that at the 
 moment.
 -*/
 -   bool need_lowering = !has_sample_d_c ||
 -  ir->sampler->type->sampler_dimensionality == GLSL_SAMPLER_DIM_CUBE;
 +   bool need_lowering = false;
 +
 +   if (ir->shadow_comparitor) {
 +  /* Lower textureGrad() with samplerCubeShadow even if we have the
 +   * sample_d_c message.  GLSL provides gradients for the 'r' 
 coordinate.

[Mesa-dev] [Bug 30279] corender broken on llvmpipe and swrast

2013-09-09 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=30279

--- Comment #2 from David "okias" Heidelberger  ---
# LIBGL_ALWAYS_SOFTWARE=1 ./corender & sleep 1 ; ./corender dummy
[2] 21231
(null)Waiting for connection from another 'corender'
[1]   Exit 1  LIBGL_ALWAYS_SOFTWARE=1 ./corender
Got connection, sending windowID
Contacted first 'corender', getting WindowID
X Error of failed request:  BadDrawable (invalid Pixmap or Window parameter)
  Major opcode of failed request:  152 (DRI2)
  Minor opcode of failed request:  7 (DRI2GetBuffersWithFormat  )
  Resource id in failed request:  0x1e3
  Serial number of failed request:  372
  Current serial number in output stream:  372

-- 
You are receiving this mail because:
You are the assignee for the bug.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 30279] corender broken on llvmpipe and swrast

2013-09-09 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=30279

David "okias" Heidelberger  changed:

   What|Removed |Added

   Hardware|Other   |x86-64 (AMD64)
 OS|All |Linux (All)
 CC||david.heidelber...@ixit.cz

--- Comment #3 from David "okias" Heidelberger  ---
with LIBGL_ALWAYS_SOFTWARE=1 is something displayed, but after moment it
disapears. Mesa 9.3, kernel 3.11.0, llvm 3.3.

-- 
You are receiving this mail because:
You are the assignee for the bug.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] i965/vec4: Only zero out unused message components when there are any.

2013-09-09 Thread Chris Forbes
Thanks Ken -- I should have spotted this but was being blind.

Does the GPU generally hang if the destination writemask has all
channels disabled? That sounds like something that could be worth
sanity-checking for in the backend.

Confirmed this fixes the hangs in the textureGather vs tests.

Reviewed-by: Chris Forbes 

On Tue, Sep 10, 2013 at 6:38 AM, Kenneth Graunke  wrote:
> Otherwise, coordinates with four components would result in a MOV
> with a destination writemask that has no channels enabled:
>
> mov(8) g115<1>.F 0D { align16 WE_normal NoDDChk 1Q };
>
> At best, this is stupid: we emit code that shouldn't do anything.
> Worse, it apparently causes GPU hangs (observable with Chris's
> textureGather test on CubeArrays.)
>
> Signed-off-by: Kenneth Graunke 
> Cc: Chris Forbes 
> Cc: mesa-sta...@lists.freedesktop.org
> ---
>  src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 6 --
>  1 file changed, 4 insertions(+), 2 deletions(-)
>
> diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp 
> b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
> index ca52fd3..8cfe4e3 100644
> --- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
> @@ -2245,8 +2245,10 @@ vec4_visitor::visit(ir_texture *ir)
>  emit(MOV(dst_reg(MRF, param_base, ir->coordinate->type, coord_mask),
>   coordinate));
>}
> -  emit(MOV(dst_reg(MRF, param_base, ir->coordinate->type, zero_mask),
> -  src_reg(0)));
> +  if (zero_mask != 0) {
> + emit(MOV(dst_reg(MRF, param_base, ir->coordinate->type, zero_mask),
> +  src_reg(0)));
> +  }
>/* Load the shadow comparitor */
>if (ir->shadow_comparitor && ir->op != ir_txd) {
>  emit(MOV(dst_reg(MRF, param_base + 1, ir->shadow_comparitor->type,
> --
> 1.8.3.4
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] i965/gen7: always lower textureGrad() on gen7

2013-09-09 Thread Ian Romanick
On 09/05/2013 03:35 AM, Chia-I Wu wrote:
> sample_d is slower than the lowered version on gen7.  For gen7, this improves
> Xonotic benchmark with Ultimate effects by as much as 25%:
> 
>  before the change:  40.06 fps
>  after the change:   51.10 fps
>  after the change with INTEL_DEBUG=no16: 44.46 fps
> 
> As sample_d is not allowed in SIMD16 mode, I firstly thought the difference
> was from SIMD8 versus SIMD16.  If that was the case, we would want to apply
> brw_lower_texture_gradients() only on fragment shaders in SIMD16 mode.
> 
> But, as the numbers show, there is still 10% improvement when SIMD16 is forced
> off after the change.  Thus textureGrad() is lowered unconditionally for now.
> Due to this and that I haven't tried it on Haswell, this is still RFC.

A lot of this code depends on the texture targets being used.  What
texture targets is Xonotic using with textureGrad?

> No piglit regressions.
> 
> Signed-off-by: Chia-I Wu 
> ---
>  .../dri/i965/brw_lower_texture_gradients.cpp   | 54 
> ++
>  1 file changed, 36 insertions(+), 18 deletions(-)
> 
> diff --git a/src/mesa/drivers/dri/i965/brw_lower_texture_gradients.cpp 
> b/src/mesa/drivers/dri/i965/brw_lower_texture_gradients.cpp
> index 1589a20..f3fcb56 100644
> --- a/src/mesa/drivers/dri/i965/brw_lower_texture_gradients.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_lower_texture_gradients.cpp
> @@ -34,8 +34,8 @@ using namespace ir_builder;
>  
>  class lower_texture_grad_visitor : public ir_hierarchical_visitor {
>  public:
> -   lower_texture_grad_visitor(bool has_sample_d_c)
> -  : has_sample_d_c(has_sample_d_c)
> +   lower_texture_grad_visitor(bool has_sample_d, bool has_sample_d_c)
> +  : has_sample_d(has_sample_d), has_sample_d_c(has_sample_d_c)
> {
>progress = false;
> }
> @@ -44,6 +44,7 @@ public:
>  
>  
> bool progress;
> +   bool has_sample_d;
> bool has_sample_d_c;
>  
>  private:
> @@ -90,22 +91,33 @@ txs_type(const glsl_type *type)
>  ir_visitor_status
>  lower_texture_grad_visitor::visit_leave(ir_texture *ir)
>  {
> -   /* Only lower textureGrad with shadow samplers */
> -   if (ir->op != ir_txd || !ir->shadow_comparitor)
> +   if (ir->op != ir_txd)
>return visit_continue;
>  
> -   /* Lower textureGrad() with samplerCubeShadow even if we have the 
> sample_d_c
> -* message.  GLSL provides gradients for the 'r' coordinate.  
> Unfortunately:
> -*
> -* From the Ivybridge PRM, Volume 4, Part 1, sample_d message description:
> -* "The r coordinate contains the faceid, and the r gradients are ignored
> -*  by hardware."
> -*
> -* We likely need to do a similar treatment for samplerCube and
> -* samplerCubeArray, but we have insufficient testing for that at the 
> moment.
> -*/
> -   bool need_lowering = !has_sample_d_c ||
> -  ir->sampler->type->sampler_dimensionality == GLSL_SAMPLER_DIM_CUBE;
> +   bool need_lowering = false;
> +
> +   if (ir->shadow_comparitor) {
> +  /* Lower textureGrad() with samplerCubeShadow even if we have the
> +   * sample_d_c message.  GLSL provides gradients for the 'r' coordinate.
> +   * Unfortunately:
> +   *
> +   * From the Ivybridge PRM, Volume 4, Part 1, sample_d message
> +   * description: "The r coordinate contains the faceid, and the r
> +   * gradients are ignored by hardware."
> +   */
> +  if (ir->sampler->type->sampler_dimensionality == GLSL_SAMPLER_DIM_CUBE)
> + need_lowering = true;
> +  else if (!has_sample_d_c)
> + need_lowering = true;

This should look like the old code:

need_lowering = !has_sample_d_c ||
   ir->sampler->type->sampler_dimensionality == GLSL_SAMPLER_DIM_CUBE;

> +   }
> +   else {
> +  /* We likely need to do a similar treatment for samplerCube and
> +   * samplerCubeArray, but we have insufficient testing for that at the
> +   * moment.
> +   */
> +  if (!has_sample_d)
> + need_lowering = true;

need_lowering = !has_sample_d;

> +   }
>  
> if (!need_lowering)
>return visit_continue;
> @@ -154,7 +166,9 @@ lower_texture_grad_visitor::visit_leave(ir_texture *ir)
>  expr(ir_unop_sqrt, dot(dPdy, dPdy)));
> }
>  
> -   /* lambda_base = log2(rho).  We're ignoring GL state biases for now. */
> +   /* lambda_base = log2(rho).  It will be biased and clamped by values
> +* defined in SAMPLER_STATE to get the final lambda.
> +*/
> ir->op = ir_txl;
> ir->lod_info.lod = expr(ir_unop_log2, rho);
>  
> @@ -168,8 +182,12 @@ bool
>  brw_lower_texture_gradients(struct brw_context *brw,
>  struct exec_list *instructions)
>  {
> +   /* sample_d is slower than the lowered version on gen7, and is not allowed
> +* in SIMD16 mode.  Treating it as unsupported improves the performance.
> +*/
> +   bool has_sample_d = brw->gen != 7;

Based on the data in one of the oth

Re: [Mesa-dev] [PATCH 08/15] i965/gen6+: Remove VUE map dependency on userclip_active.

2013-09-09 Thread Ian Romanick
On 09/03/2013 06:18 PM, Paul Berry wrote:
> Previously, on Gen6+, we laid out the vertex (or geometry) shader VUE
> map differently depending whether user clipping was active.  If it was
> active, we put the clip distances in slots 2 and 3 (where the clipper
> expects them); if it was inactive, we assigned them in the order of
> the gl_varying_slot enum.
> 
> This made for unnecessary recompiles, since turning clipping on/off
> for a shader that used gl_ClipDistance might rearrange the varyings.
> It also required extra bookkeeping, since it required the user
> clipping flag to be provided to brw_compute_vue_map() as a parameter.
> 
> With this patch, we always put clip distances at in slots 2 and 3 if
> they are written to.  do_vs_prog() and do_gs_prog() are responsible
> for ensuring that clip distances are written to when user clipping is
> enabled (as do_vs_prog() previously did for gen4-5).
> 
> This makes the only input to brw_compute_vue_map() a bitfield of which
> varyings the shader writes to, a fact that we'll take advantage of in
> forthcoming patches.
> ---
>  src/mesa/drivers/dri/i965/brw_context.h |  2 +-
>  src/mesa/drivers/dri/i965/brw_vec4_gs.c | 15 ---
>  src/mesa/drivers/dri/i965/brw_vs.c  | 26 +-
>  3 files changed, 26 insertions(+), 17 deletions(-)
> 
> diff --git a/src/mesa/drivers/dri/i965/brw_context.h 
> b/src/mesa/drivers/dri/i965/brw_context.h
> index 167ed4a..0c1fd9e 100644
> --- a/src/mesa/drivers/dri/i965/brw_context.h
> +++ b/src/mesa/drivers/dri/i965/brw_context.h
> @@ -436,7 +436,7 @@ static inline GLuint brw_varying_to_offset(struct 
> brw_vue_map *vue_map,
>  }
>  
>  void brw_compute_vue_map(struct brw_context *brw, struct brw_vue_map 
> *vue_map,
> - GLbitfield64 slots_valid, bool userclip_active);
> + GLbitfield64 slots_valid);
>  
>  
>  /**
> diff --git a/src/mesa/drivers/dri/i965/brw_vec4_gs.c 
> b/src/mesa/drivers/dri/i965/brw_vec4_gs.c
> index 7ab03ac..94c4017 100644
> --- a/src/mesa/drivers/dri/i965/brw_vec4_gs.c
> +++ b/src/mesa/drivers/dri/i965/brw_vec4_gs.c
> @@ -62,9 +62,18 @@ do_gs_prog(struct brw_context *brw,
> c.prog_data.base.param = rzalloc_array(NULL, const float *, param_count);
> c.prog_data.base.pull_param = rzalloc_array(NULL, const float *, 
> param_count);
>  
> -   brw_compute_vue_map(brw, &c.prog_data.base.vue_map,
> -   gp->program.Base.OutputsWritten,
> -   c.key.base.userclip_active);
> +   GLbitfield64 outputs_written = gp->program.Base.OutputsWritten;
> +
> +   /* In order for legacy clipping to work, we need to populate the clip
> +* distance varying slots whenever clipping is enabled, even if the vertex
> +* shader doesn't write to gl_ClipDistance.
> +*/
> +   if (c.key.base.userclip_active) {
> +  outputs_written |= BITFIELD64_BIT(VARYING_SLOT_CLIP_DIST0);
> +  outputs_written |= BITFIELD64_BIT(VARYING_SLOT_CLIP_DIST1);
> +   }
> +
> +   brw_compute_vue_map(brw, &c.prog_data.base.vue_map, outputs_written);

Having both of the callers of brw_compute_vue_map do this same dance
before calling seems weird.  I want to understand this better... The VS
and GS code adds clip distance as a written output because fixed
function hardware is going to read it when user clipping is enabled.
This then gets captured in the vue_map... so when the next patch adds
another caller to brw_compute_vue_map, it doesn't need to know that user
clipping was enabled (vs. gl_ClipDistance being written).  Yeah?

> /* Compute the output vertex size.
>  *
> diff --git a/src/mesa/drivers/dri/i965/brw_vs.c 
> b/src/mesa/drivers/dri/i965/brw_vs.c
> index b81a538..6b97f01 100644
> --- a/src/mesa/drivers/dri/i965/brw_vs.c
> +++ b/src/mesa/drivers/dri/i965/brw_vs.c
> @@ -52,14 +52,10 @@ static inline void assign_vue_slot(struct brw_vue_map 
> *vue_map,
>  
>  /**
>   * Compute the VUE map for vertex shader program.
> - *
> - * Note that consumers of this map using cache keys must include
> - * prog_data->userclip and prog_data->outputs_written in their key
> - * (generated by CACHE_NEW_VS_PROG).
>   */
>  void
>  brw_compute_vue_map(struct brw_context *brw, struct brw_vue_map *vue_map,
> -GLbitfield64 slots_valid, bool userclip_active)
> +GLbitfield64 slots_valid)
>  {
> vue_map->slots_valid = slots_valid;
> int i;
> @@ -107,10 +103,11 @@ brw_compute_vue_map(struct brw_context *brw, struct 
> brw_vue_map *vue_map,
> */
>assign_vue_slot(vue_map, VARYING_SLOT_PSIZ);
>assign_vue_slot(vue_map, VARYING_SLOT_POS);
> -  if (userclip_active) {
> +  if (slots_valid & BITFIELD64_BIT(VARYING_SLOT_CLIP_DIST0))
>   assign_vue_slot(vue_map, VARYING_SLOT_CLIP_DIST0);
> +  if (slots_valid & BITFIELD64_BIT(VARYING_SLOT_CLIP_DIST1))
>   assign_vue_slot(vue_map, VARYING_SLOT_CLIP_DIST1);
> -  }
> +
>/* front and back color

Re: [Mesa-dev] [PATCH 05/15] i965/sf: Consolidate common code for setting up gen6-7 attribute overrides.

2013-09-09 Thread Ian Romanick
On 09/03/2013 06:18 PM, Paul Berry wrote:
> ---
>  src/mesa/drivers/dri/i965/brw_state.h |   9 +-
>  src/mesa/drivers/dri/i965/gen6_sf_state.c | 153 
> +-
>  src/mesa/drivers/dri/i965/gen7_sf_state.c |  64 +
>  3 files changed, 97 insertions(+), 129 deletions(-)
> 
> diff --git a/src/mesa/drivers/dri/i965/brw_state.h 
> b/src/mesa/drivers/dri/i965/brw_state.h
> index 22e4a61..dd3e216 100644
> --- a/src/mesa/drivers/dri/i965/brw_state.h
> +++ b/src/mesa/drivers/dri/i965/brw_state.h
> @@ -223,9 +223,12 @@ void gen4_init_vtable_sampler_functions(struct 
> brw_context *brw);
>  void gen7_init_vtable_sampler_functions(struct brw_context *brw);
>  
>  /* gen6_sf_state.c */
> -uint32_t
> -get_attr_override(const struct brw_vue_map *vue_map, int 
> urb_entry_read_offset,
> -  int fs_attr, bool two_side_color, uint32_t 
> *max_source_attr);
> +void
> +calculate_attr_overrides(const struct brw_context *brw,
> + uint16_t *attr_overrides,
> + uint32_t *point_sprite_enables,
> + uint32_t *flat_enables,
> + uint32_t *urb_entry_read_length);
>  
>  /* brw_vs_surface_state.c */
>  void
> diff --git a/src/mesa/drivers/dri/i965/gen6_sf_state.c 
> b/src/mesa/drivers/dri/i965/gen6_sf_state.c
> index dfe9a31..7094994 100644
> --- a/src/mesa/drivers/dri/i965/gen6_sf_state.c
> +++ b/src/mesa/drivers/dri/i965/gen6_sf_state.c
> @@ -52,7 +52,7 @@
>   * the VUE that are not needed by the fragment shader.  It is measured in
>   * 256-bit increments.
>   */
> -uint32_t
> +static uint32_t
>  get_attr_override(const struct brw_vue_map *vue_map, int 
> urb_entry_read_offset,
>int fs_attr, bool two_side_color, uint32_t 
> *max_source_attr)
>  {
> @@ -123,21 +123,98 @@ get_attr_override(const struct brw_vue_map *vue_map, 
> int urb_entry_read_offset,
> return source_attr;
>  }
>  
> +
> +/**
> + * Create the mapping from the FS inputs we produce to the VS outputs they

Only VS outputs?

> + * source from.
> + */
> +void
> +calculate_attr_overrides(const struct brw_context *brw,
> + uint16_t *attr_overrides,
> + uint32_t *point_sprite_enables,
> + uint32_t *flat_enables,
> + uint32_t *urb_entry_read_length)
> +{
> +   const int urb_entry_read_offset = BRW_SF_URB_ENTRY_READ_OFFSET;
> +   uint32_t max_source_attr = 0;
> +   int input_index = 0;
> +
> +   /* _NEW_LIGHT */
> +   bool shade_model_flat = brw->ctx.Light.ShadeModel == GL_FLAT;
> +
> +   for (int attr = 0; attr < VARYING_SLOT_MAX; attr++) {
> +  enum glsl_interp_qualifier interp_qualifier =
> + brw->fragment_program->InterpQualifier[attr];
> +  bool is_gl_Color = attr == VARYING_SLOT_COL0 || attr == 
> VARYING_SLOT_COL1;
> +
> +  if (!(brw->fragment_program->Base.InputsRead & BITFIELD64_BIT(attr)))
> +  continue;
> +
> +  /* _NEW_POINT */
> +  if (brw->ctx.Point.PointSprite &&
> +   (attr >= VARYING_SLOT_TEX0 && attr <= VARYING_SLOT_TEX7) &&
> +   brw->ctx.Point.CoordReplace[attr - VARYING_SLOT_TEX0]) {
> +  *point_sprite_enables |= (1 << input_index);
> +  }
> +
> +  if (attr == VARYING_SLOT_PNTC)
> +  *point_sprite_enables |= (1 << input_index);
> +
> +  /* flat shading */
> +  if (interp_qualifier == INTERP_QUALIFIER_FLAT ||
> +  (shade_model_flat && is_gl_Color &&
> +   interp_qualifier == INTERP_QUALIFIER_NONE))
> + *flat_enables |= (1 << input_index);
> +
> +  /* The hardware can only do the overrides on 16 overrides at a
> +   * time, and the other up to 16 have to be lined up so that the
> +   * input index = the output index.  We'll need to do some
> +   * tweaking to make sure that's the case.
> +   */
> +  assert(input_index < 16 || attr == input_index);
> +
> +  /* BRW_NEW_VUE_MAP_GEOM_OUT | _NEW_LIGHT | _NEW_PROGRAM */
> +  attr_overrides[input_index++] =
> + get_attr_override(&brw->vue_map_geom_out,
> +urb_entry_read_offset, attr,
> +   brw->ctx.VertexProgram._TwoSideEnabled,
> +   &max_source_attr);
> +   }
> +
> +   for (; input_index < VARYING_SLOT_MAX; input_index++)
> +  attr_overrides[input_index] = 0;
> +
> +   /* From the Sandy Bridge PRM, Volume 2, Part 1, documentation for
> +* 3DSTATE_SF DWord 1 bits 15:11, "Vertex URB Entry Read Length":
> +*
> +* "This field should be set to the minimum length required to read the
> +*  maximum source attribute.  The maximum source attribute is indicated
> +*  by the maximum value of the enabled Attribute # Source Attribute if
> +*  Attribute Swizzle Enable is set, Number of Output Attributes-1 if
> +*  enable is not set.
> +*  read_length = ceiling((max_source_attr + 1) / 2)
> +*
> +*  [errata] Corruption/Hang p

Re: [Mesa-dev] [PATCH] i965/vec4: Only zero out unused message components when there are any.

2013-09-09 Thread Ian Romanick
On 09/09/2013 03:35 PM, Ian Romanick wrote:
> On 09/09/2013 01:38 PM, Kenneth Graunke wrote:
>> Otherwise, coordinates with four components would result in a MOV
>> with a destination writemask that has no channels enabled:
>>
>> mov(8) g115<1>.F 0D { align16 WE_normal NoDDChk 1Q };
>>
>> At best, this is stupid: we emit code that shouldn't do anything.
>> Worse, it apparently causes GPU hangs (observable with Chris's
>> textureGather test on CubeArrays.)

I also meant to say:

Do we have something like ir_validate for GEN assembly?  It sure seems
like we should...

>> Signed-off-by: Kenneth Graunke 
>> Cc: Chris Forbes 
>> Cc: mesa-sta...@lists.freedesktop.org
>> ---
>>  src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 6 --
>>  1 file changed, 4 insertions(+), 2 deletions(-)
>>
>> diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp 
>> b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
>> index ca52fd3..8cfe4e3 100644
>> --- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
>> +++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
>> @@ -2245,8 +2245,10 @@ vec4_visitor::visit(ir_texture *ir)
>>   emit(MOV(dst_reg(MRF, param_base, ir->coordinate->type, coord_mask),
>>coordinate));
>>}
>> -  emit(MOV(dst_reg(MRF, param_base, ir->coordinate->type, zero_mask),
>> -   src_reg(0)));
>> +  if (zero_mask != 0) {
>> + emit(MOV(dst_reg(MRF, param_base, ir->coordinate->type, zero_mask),
>> +  src_reg(0)));
>> +  }
> 
> I'd be tempted to do this as:
> 
> if (coord_mask != 0x0f) {
>const int zero_mask = 0x0f & ~coord_mask;
> 
>emit(MOV(dst_reg(MRF, param_base, ir->coordinate->type, zero_mask),
> src_reg(0)));
> }
> 
> And delete the silly zero_mask loop above.
> 
> I'm also not sure why coord_mask is calculated by a loop, but I can
> leave that bikeshed for another day...
> 
> Either way, the patch is
> 
> Reviewed-by: Ian Romanick 
> 
>>/* Load the shadow comparitor */
>>if (ir->shadow_comparitor && ir->op != ir_txd) {
>>   emit(MOV(dst_reg(MRF, param_base + 1, ir->shadow_comparitor->type,
> 

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 62647] Wrong rendering of Dota 2 on Wine (apitrace attached) - Intel IVB HD4000

2013-09-09 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=62647

--- Comment #33 from Auke Kok  ---
(In reply to comment #31)
> Can we have a new bug for issues in the native client?

This is now confirmed fixed with mesa-9.1.6.

-- 
You are receiving this mail because:
You are on the CC list for the bug.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] i965/vec4: Only zero out unused message components when there are any.

2013-09-09 Thread Ian Romanick
On 09/09/2013 01:38 PM, Kenneth Graunke wrote:
> Otherwise, coordinates with four components would result in a MOV
> with a destination writemask that has no channels enabled:
> 
> mov(8) g115<1>.F 0D { align16 WE_normal NoDDChk 1Q };
> 
> At best, this is stupid: we emit code that shouldn't do anything.
> Worse, it apparently causes GPU hangs (observable with Chris's
> textureGather test on CubeArrays.)
> 
> Signed-off-by: Kenneth Graunke 
> Cc: Chris Forbes 
> Cc: mesa-sta...@lists.freedesktop.org
> ---
>  src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 6 --
>  1 file changed, 4 insertions(+), 2 deletions(-)
> 
> diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp 
> b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
> index ca52fd3..8cfe4e3 100644
> --- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
> @@ -2245,8 +2245,10 @@ vec4_visitor::visit(ir_texture *ir)
>emit(MOV(dst_reg(MRF, param_base, ir->coordinate->type, coord_mask),
> coordinate));
>}
> -  emit(MOV(dst_reg(MRF, param_base, ir->coordinate->type, zero_mask),
> -src_reg(0)));
> +  if (zero_mask != 0) {
> + emit(MOV(dst_reg(MRF, param_base, ir->coordinate->type, zero_mask),
> +  src_reg(0)));
> +  }

I'd be tempted to do this as:

if (coord_mask != 0x0f) {
   const int zero_mask = 0x0f & ~coord_mask;

   emit(MOV(dst_reg(MRF, param_base, ir->coordinate->type, zero_mask),
src_reg(0)));
}

And delete the silly zero_mask loop above.

I'm also not sure why coord_mask is calculated by a loop, but I can
leave that bikeshed for another day...

Either way, the patch is

Reviewed-by: Ian Romanick 

>/* Load the shadow comparitor */
>if (ir->shadow_comparitor && ir->op != ir_txd) {
>emit(MOV(dst_reg(MRF, param_base + 1, ir->shadow_comparitor->type,

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] R600: add a test for SI.tbuffer.store

2013-09-09 Thread Marek Olšák
Signed-off-by: Marek Olšák 
---
 test/CodeGen/R600/llvm.SI.tbuffer.store.ll | 40 ++
 1 file changed, 40 insertions(+)
 create mode 100644 test/CodeGen/R600/llvm.SI.tbuffer.store.ll

diff --git a/test/CodeGen/R600/llvm.SI.tbuffer.store.ll 
b/test/CodeGen/R600/llvm.SI.tbuffer.store.ll
new file mode 100644
index 000..80b246c
--- /dev/null
+++ b/test/CodeGen/R600/llvm.SI.tbuffer.store.ll
@@ -0,0 +1,40 @@
+;RUN: llc < %s -march=r600 -mcpu=verde | FileCheck %s
+
+;CHECK: TBUFFER_STORE_FORMAT_XYZW 
{{VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+}}, 32, -1, 0, -1, 0, 14, 4, 
{{VGPR[0-9]+}}, {{SGPR[0-9]+_SGPR[0-9]+_SGPR[0-9]+_SGPR[0-9]+}}, -1, 0, 0
+define void @test1(i32 %a1, i32 %vaddr) {
+%vdata = insertelement <4 x i32> undef, i32 %a1, i32 0
+call void @llvm.SI.tbuffer.store.v4i32(<16 x i8> undef, <4 x i32> %vdata,
+i32 4, i32 %vaddr, i32 0, i32 32, i32 14, i32 4, i32 1, i32 0, i32 1,
+i32 1, i32 0)
+ret void
+}
+
+;CHECK: TBUFFER_STORE_FORMAT_XYZ 
{{VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+}}, 24, -1, 0, -1, 0, 13, 4, 
{{VGPR[0-9]+}}, {{SGPR[0-9]+_SGPR[0-9]+_SGPR[0-9]+_SGPR[0-9]+}}, -1, 0, 0
+define void @test2(i32 %a1, i32 %vaddr) {
+%vdata = insertelement <4 x i32> undef, i32 %a1, i32 0
+call void @llvm.SI.tbuffer.store.v4i32(<16 x i8> undef, <4 x i32> %vdata,
+i32 3, i32 %vaddr, i32 0, i32 24, i32 13, i32 4, i32 1, i32 0, i32 1,
+i32 1, i32 0)
+ret void
+}
+
+;CHECK: TBUFFER_STORE_FORMAT_XY {{VGPR[0-9]+_VGPR[0-9]+}}, 16, -1, 0, -1, 0, 
11, 4, {{VGPR[0-9]+}}, {{SGPR[0-9]+_SGPR[0-9]+_SGPR[0-9]+_SGPR[0-9]+}}, -1, 0, 0
+define void @test3(i32 %a1, i32 %vaddr) {
+%vdata = insertelement <2 x i32> undef, i32 %a1, i32 0
+call void @llvm.SI.tbuffer.store.v2i32(<16 x i8> undef, <2 x i32> %vdata,
+i32 2, i32 %vaddr, i32 0, i32 16, i32 11, i32 4, i32 1, i32 0, i32 1,
+i32 1, i32 0)
+ret void
+}
+
+;CHECK: TBUFFER_STORE_FORMAT_X {{VGPR[0-9]+}}, 8, -1, 0, -1, 0, 4, 4, 
{{VGPR[0-9]+}}, {{SGPR[0-9]+_SGPR[0-9]+_SGPR[0-9]+_SGPR[0-9]+}}, -1, 0, 0
+define void @test4(i32 %vdata, i32 %vaddr) {
+call void @llvm.SI.tbuffer.store.i32(<16 x i8> undef, i32 %vdata,
+i32 1, i32 %vaddr, i32 0, i32 8, i32 4, i32 4, i32 1, i32 0, i32 1,
+i32 1, i32 0)
+ret void
+}
+
+declare void @llvm.SI.tbuffer.store.i32(<16 x i8>, i32, i32, i32, i32, i32, 
i32, i32, i32, i32, i32, i32, i32)
+declare void @llvm.SI.tbuffer.store.v2i32(<16 x i8>, <2 x i32>, i32, i32, i32, 
i32, i32, i32, i32, i32, i32, i32, i32)
+declare void @llvm.SI.tbuffer.store.v4i32(<16 x i8>, <4 x i32>, i32, i32, i32, 
i32, i32, i32, i32, i32, i32, i32, i32)
-- 
1.8.1.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] glsl: Correctly validate fma()'s types.

2013-09-09 Thread Kenneth Graunke

On 09/09/2013 11:23 AM, Matt Turner wrote:

lrp() can take a scalar as a third argument, and fma() cannot.
---
  src/glsl/ir_validate.cpp | 6 ++
  1 file changed, 6 insertions(+)

diff --git a/src/glsl/ir_validate.cpp b/src/glsl/ir_validate.cpp
index 105f615..4898211 100644
--- a/src/glsl/ir_validate.cpp
+++ b/src/glsl/ir_validate.cpp
@@ -531,6 +531,12 @@ ir_validate::visit_leave(ir_expression *ir)
break;

 case ir_triop_fma:
+  assert(ir->type->base_type == GLSL_TYPE_FLOAT);
+  assert(ir->type == ir->operands[0]->type);
+  assert(ir->type == ir->operands[1]->type);
+  assert(ir->type == ir->operands[2]->type);
+  break;
+
 case ir_triop_lrp:
assert(ir->operands[0]->type->base_type == GLSL_TYPE_FLOAT);
assert(ir->operands[0]->type == ir->operands[1]->type);



Always glad to see more validation.
Reviewed-by: Kenneth Graunke 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 4/9] i965/gs: Set control data header size/format appropriately for EndPrimitive().

2013-09-09 Thread Chris Forbes
In the commit message:

> Fortunately, OpenGL only requires separate streams to be supported
>when the output type is points, and EndPrimitive() only has an effect
> when the input type is line_strip or triangle_strip, so it's not a...

Shouldn't this say 'output type' ?

-- Chris

On Tue, Sep 10, 2013 at 3:20 AM, Paul Berry  wrote:
> The gen7 geometry shader uses a "control data header" at the beginning
> of the output URB entry to store either
>
> (a) flag bits (1 bit/vertex) indicating whether EndPrimitive() was
> called after each vertex, or
>
> (b) stream ID bits (2 bits/vertex) indicating which stream each vertex
> should be sent to (when multiple transform feedback streams are in
> use).
>
> Fortunately, OpenGL only requires separate streams to be supported
> when the output type is points, and EndPrimitive() only has an effect
> when the input type is line_strip or triangle_strip, so it's not a
> problem that these two uses of the control data header are mutually
> exclusive.
>
> This patch modifies do_vec4_gs_prog() to determine the correct
> hardware settings for configuring the control data header, and
> modifies upload_gs_state() to propagate these settings to the
> hardware.
>
> In addition, it modifies do_vec4_gs_prog() to ensure that the output
> URB entry is large enough to contain both the output vertices *and*
> the control data header.
>
> Finally, it modifies vec4_gs_visitor so that it accounts for the size
> of the control data header when computing the offset within the URB
> where output vertex data should be stored.
> ---
>  src/mesa/drivers/dri/i965/brw_context.h   | 14 ++
>  src/mesa/drivers/dri/i965/brw_defines.h   |  4 +++
>  src/mesa/drivers/dri/i965/brw_vec4_gs.c   | 33 
> +++
>  src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp |  1 +
>  src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.h   |  3 +++
>  src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp|  2 +-
>  src/mesa/drivers/dri/i965/gen7_gs_state.c |  4 +++
>  7 files changed, 60 insertions(+), 1 deletion(-)
>
> diff --git a/src/mesa/drivers/dri/i965/brw_context.h 
> b/src/mesa/drivers/dri/i965/brw_context.h
> index 57f086b..c566bba 100644
> --- a/src/mesa/drivers/dri/i965/brw_context.h
> +++ b/src/mesa/drivers/dri/i965/brw_context.h
> @@ -548,6 +548,20 @@ struct brw_gs_prog_data
> unsigned output_vertex_size_hwords;
>
> unsigned output_topology;
> +
> +   /**
> +* Size of the control data (cut bits or StreamID bits), in hwords (32
> +* bytes).  0 if there is no control data.
> +*/
> +   unsigned control_data_header_size_hwords;
> +
> +   /**
> +* Format of the control data (either 
> GEN7_GS_CONTROL_DATA_FORMAT_GSCTL_SID
> +* if the control data is StreamID bits, or
> +* GEN7_GS_CONTROL_DATA_FORMAT_GSCTL_CUT if the control data is cut bits).
> +* Ignored if control_data_header_size is 0.
> +*/
> +   unsigned control_data_format;
>  };
>
>  /** Number of texture sampler units */
> diff --git a/src/mesa/drivers/dri/i965/brw_defines.h 
> b/src/mesa/drivers/dri/i965/brw_defines.h
> index 0406c4d..6db2570 100644
> --- a/src/mesa/drivers/dri/i965/brw_defines.h
> +++ b/src/mesa/drivers/dri/i965/brw_defines.h
> @@ -1337,6 +1337,10 @@ enum brw_message_target {
>  /* DW5 */
>  # define GEN6_GS_MAX_THREADS_SHIFT 25
>  # define HSW_GS_MAX_THREADS_SHIFT  24
> +# define GEN7_GS_CONTROL_DATA_FORMAT_SHIFT 24
> +# define GEN7_GS_CONTROL_DATA_FORMAT_GSCTL_CUT 0
> +# define GEN7_GS_CONTROL_DATA_FORMAT_GSCTL_SID 1
> +# define GEN7_GS_CONTROL_DATA_HEADER_SIZE_SHIFT20
>  # define GEN7_GS_DISPATCH_MODE_SINGLE  (0 << 11)
>  # define GEN7_GS_DISPATCH_MODE_DUAL_INSTANCE   (1 << 11)
>  # define GEN7_GS_DISPATCH_MODE_DUAL_OBJECT (2 << 11)
> diff --git a/src/mesa/drivers/dri/i965/brw_vec4_gs.c 
> b/src/mesa/drivers/dri/i965/brw_vec4_gs.c
> index 7ab03ac..f67ae2b 100644
> --- a/src/mesa/drivers/dri/i965/brw_vec4_gs.c
> +++ b/src/mesa/drivers/dri/i965/brw_vec4_gs.c
> @@ -62,6 +62,38 @@ do_gs_prog(struct brw_context *brw,
> c.prog_data.base.param = rzalloc_array(NULL, const float *, param_count);
> c.prog_data.base.pull_param = rzalloc_array(NULL, const float *, 
> param_count);
>
> +   if (gp->program.OutputType == GL_POINTS) {
> +  /* When the output type is points, the geometry shader may output data
> +   * to multiple streams, and EndPrimitive() has no effect.  So we
> +   * configure the hardware to interpret the control data as stream ID.
> +   */
> +  c.prog_data.control_data_format = 
> GEN7_GS_CONTROL_DATA_FORMAT_GSCTL_SID;
> +
> +  /* However, StreamID is not yet supported, so we output zero bits of
> +   * control data per vertex.
> +   */
> +  c.control_data_bits_per_vertex = 0;
> +   } else {
> +  /* When the output type is triangle_strip or line_strip, EndPrimitive(

[Mesa-dev] [PATCH 1/5] glsl: Add support for ldexp.

2013-09-09 Thread Matt Turner
v2: Drop frexp. Rebase on builtins rewrite.
Reviewed-by: Paul Berry  [v1]
---
 src/glsl/builtin_functions.cpp   | 14 ++
 src/glsl/ir.cpp  |  2 ++
 src/glsl/ir.h|  7 +++
 src/glsl/ir_constant_expression.cpp  | 10 ++
 src/glsl/ir_validate.cpp |  8 
 src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp |  1 +
 src/mesa/drivers/dri/i965/brw_fs_visitor.cpp |  4 
 src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp   |  3 +++
 src/mesa/program/ir_to_mesa.cpp  |  1 +
 src/mesa/state_tracker/st_glsl_to_tgsi.cpp   |  1 +
 10 files changed, 51 insertions(+)

diff --git a/src/glsl/builtin_functions.cpp b/src/glsl/builtin_functions.cpp
index eca41aa..6e14f59 100644
--- a/src/glsl/builtin_functions.cpp
+++ b/src/glsl/builtin_functions.cpp
@@ -511,6 +511,7 @@ private:
B1(findLSB)
B1(findMSB)
B1(fma)
+   B2(ldexp)
 #undef B0
 #undef B1
 #undef B2
@@ -1820,6 +1821,13 @@ builtin_builder::create_builtins()
IU(findLSB)
IU(findMSB)
F(fma)
+
+   add_function("ldexp",
+_ldexp(glsl_type::float_type, glsl_type::int_type),
+_ldexp(glsl_type::vec2_type,  glsl_type::ivec2_type),
+_ldexp(glsl_type::vec3_type,  glsl_type::ivec3_type),
+_ldexp(glsl_type::vec4_type,  glsl_type::ivec4_type),
+NULL);
 #undef F
 #undef FI
 #undef FIU
@@ -3510,6 +3518,12 @@ builtin_builder::_fma(const glsl_type *type)
 
return sig;
 }
+
+ir_function_signature *
+builtin_builder::_ldexp(const glsl_type *x_type, const glsl_type *exp_type)
+{
+   return binop(ir_binop_ldexp, gpu_shader5, x_type, x_type, exp_type);
+}
 /** @} */
 
 
/**/
diff --git a/src/glsl/ir.cpp b/src/glsl/ir.cpp
index 1b17999..a2dca45 100644
--- a/src/glsl/ir.cpp
+++ b/src/glsl/ir.cpp
@@ -401,6 +401,7 @@ ir_expression::ir_expression(int op, ir_rvalue *op0, 
ir_rvalue *op1)
case ir_binop_lshift:
case ir_binop_rshift:
case ir_binop_bfm:
+   case ir_binop_ldexp:
   this->type = op0->type;
   break;
 
@@ -551,6 +552,7 @@ static const char *const operator_strs[] = {
"packHalf2x16_split",
"bfm",
"ubo_load",
+   "ldexp",
"vector_extract",
"fma",
"lrp",
diff --git a/src/glsl/ir.h b/src/glsl/ir.h
index 2637b40..b0e7732 100644
--- a/src/glsl/ir.h
+++ b/src/glsl/ir.h
@@ -1175,6 +1175,13 @@ enum ir_expression_operation {
ir_binop_ubo_load,
 
/**
+* \name Multiplies a number by two to a power, part of ARB_gpu_shader5.
+*/
+   /*@{*/
+   ir_binop_ldexp,
+   /*@}*/
+
+   /**
 * Extract a scalar from a vector
 *
 * operand0 is the vector
diff --git a/src/glsl/ir_constant_expression.cpp 
b/src/glsl/ir_constant_expression.cpp
index a67470b..4579ef2 100644
--- a/src/glsl/ir_constant_expression.cpp
+++ b/src/glsl/ir_constant_expression.cpp
@@ -394,6 +394,7 @@ ir_expression::constant_expression_value(struct hash_table 
*variable_context)
   switch (this->operation) {
   case ir_binop_lshift:
   case ir_binop_rshift:
+  case ir_binop_ldexp:
   case ir_binop_vector_extract:
   case ir_triop_csel:
   case ir_triop_bitfield_extract:
@@ -1376,6 +1377,15 @@ ir_expression::constant_expression_value(struct 
hash_table *variable_context)
   break;
}
 
+   case ir_binop_ldexp:
+  for (unsigned c = 0; c < components; c++) {
+ data.f[c] = ldexp(op[0]->value.f[c], op[1]->value.i[c]);
+ /* Flush subnormal values to zero. */
+ if (!isnormal(data.f[c]))
+data.f[c] = copysign(0.0, op[0]->value.f[c]);
+  }
+  break;
+
case ir_triop_fma:
   assert(op[0]->type->base_type == GLSL_TYPE_FLOAT);
   assert(op[1]->type->base_type == GLSL_TYPE_FLOAT);
diff --git a/src/glsl/ir_validate.cpp b/src/glsl/ir_validate.cpp
index ae3f09d..66a9800 100644
--- a/src/glsl/ir_validate.cpp
+++ b/src/glsl/ir_validate.cpp
@@ -516,6 +516,14 @@ ir_validate::visit_leave(ir_expression *ir)
   assert(ir->operands[1]->type == glsl_type::uint_type);
   break;
 
+   case ir_binop_ldexp:
+  assert(ir->operands[0]->type == ir->type);
+  assert(ir->operands[0]->type->is_float());
+  assert(ir->operands[1]->type->base_type == GLSL_TYPE_INT);
+  assert(ir->operands[0]->type->components() ==
+ ir->operands[1]->type->components());
+  break;
+
case ir_binop_vector_extract:
   assert(ir->operands[0]->type->is_vector());
   assert(ir->operands[1]->type->is_scalar()
diff --git a/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp 
b/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp
index b4c33e6..fb932d8 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_chann

[Mesa-dev] [PATCH 2/5] glsl: Allow vectors to be created from ir_constant().

2013-09-09 Thread Matt Turner
Note the parameter name change in the int version of ir_constant, to
avoid the conflict with the loop iterator.

v2: Make analogous change to builtin_builder::imm().
Reviewed-by: Paul Berry  [v1]
---
 src/glsl/builtin_functions.cpp | 18 -
 src/glsl/ir.cpp| 44 +++---
 src/glsl/ir.h  |  8 
 3 files changed, 41 insertions(+), 29 deletions(-)

diff --git a/src/glsl/builtin_functions.cpp b/src/glsl/builtin_functions.cpp
index 6e14f59..dbd35f2 100644
--- a/src/glsl/builtin_functions.cpp
+++ b/src/glsl/builtin_functions.cpp
@@ -348,9 +348,9 @@ private:
 */
ir_variable *in_var(const glsl_type *type, const char *name);
ir_variable *out_var(const glsl_type *type, const char *name);
-   ir_constant *imm(float f);
-   ir_constant *imm(int i);
-   ir_constant *imm(unsigned u);
+   ir_constant *imm(float f, unsigned vector_elements=1);
+   ir_constant *imm(int i, unsigned vector_elements=1);
+   ir_constant *imm(unsigned u, unsigned vector_elements=1);
ir_constant *imm(const glsl_type *type, const ir_constant_data &);
ir_dereference_variable *var_ref(ir_variable *var);
ir_dereference_array *array_ref(ir_variable *var, int i);
@@ -1875,21 +1875,21 @@ builtin_builder::out_var(const glsl_type *type, const 
char *name)
 }
 
 ir_constant *
-builtin_builder::imm(float f)
+builtin_builder::imm(float f, unsigned vector_elements)
 {
-   return new(mem_ctx) ir_constant(f);
+   return new(mem_ctx) ir_constant(f, vector_elements);
 }
 
 ir_constant *
-builtin_builder::imm(int i)
+builtin_builder::imm(int i, unsigned vector_elements)
 {
-   return new(mem_ctx) ir_constant(i);
+   return new(mem_ctx) ir_constant(i, vector_elements);
 }
 
 ir_constant *
-builtin_builder::imm(unsigned u)
+builtin_builder::imm(unsigned u, unsigned vector_elements)
 {
-   return new(mem_ctx) ir_constant(u);
+   return new(mem_ctx) ir_constant(u, vector_elements);
 }
 
 ir_constant *
diff --git a/src/glsl/ir.cpp b/src/glsl/ir.cpp
index a2dca45..b0f92cb 100644
--- a/src/glsl/ir.cpp
+++ b/src/glsl/ir.cpp
@@ -619,42 +619,54 @@ ir_constant::ir_constant(const struct glsl_type *type,
memcpy(& this->value, data, sizeof(this->value));
 }
 
-ir_constant::ir_constant(float f)
+ir_constant::ir_constant(float f, unsigned vector_elements)
 {
+   assert(vector_elements <= 4);
this->ir_type = ir_type_constant;
-   this->type = glsl_type::float_type;
-   this->value.f[0] = f;
-   for (int i = 1; i < 16; i++)  {
+   this->type = glsl_type::get_instance(GLSL_TYPE_FLOAT, vector_elements, 1);
+   for (unsigned i = 0; i < vector_elements; i++) {
+  this->value.f[i] = f;
+   }
+   for (unsigned i = vector_elements; i < 16; i++)  {
   this->value.f[i] = 0;
}
 }
 
-ir_constant::ir_constant(unsigned int u)
+ir_constant::ir_constant(unsigned int u, unsigned vector_elements)
 {
+   assert(vector_elements <= 4);
this->ir_type = ir_type_constant;
-   this->type = glsl_type::uint_type;
-   this->value.u[0] = u;
-   for (int i = 1; i < 16; i++) {
+   this->type = glsl_type::get_instance(GLSL_TYPE_UINT, vector_elements, 1);
+   for (unsigned i = 0; i < vector_elements; i++) {
+  this->value.u[i] = u;
+   }
+   for (unsigned i = vector_elements; i < 16; i++) {
   this->value.u[i] = 0;
}
 }
 
-ir_constant::ir_constant(int i)
+ir_constant::ir_constant(int integer, unsigned vector_elements)
 {
+   assert(vector_elements <= 4);
this->ir_type = ir_type_constant;
-   this->type = glsl_type::int_type;
-   this->value.i[0] = i;
-   for (int i = 1; i < 16; i++) {
+   this->type = glsl_type::get_instance(GLSL_TYPE_INT, vector_elements, 1);
+   for (unsigned i = 0; i < vector_elements; i++) {
+  this->value.i[i] = integer;
+   }
+   for (unsigned i = vector_elements; i < 16; i++) {
   this->value.i[i] = 0;
}
 }
 
-ir_constant::ir_constant(bool b)
+ir_constant::ir_constant(bool b, unsigned vector_elements)
 {
+   assert(vector_elements <= 4);
this->ir_type = ir_type_constant;
-   this->type = glsl_type::bool_type;
-   this->value.b[0] = b;
-   for (int i = 1; i < 16; i++) {
+   this->type = glsl_type::get_instance(GLSL_TYPE_BOOL, vector_elements, 1);
+   for (unsigned i = 0; i < vector_elements; i++) {
+  this->value.b[i] = b;
+   }
+   for (unsigned i = vector_elements; i < 16; i++) {
   this->value.b[i] = false;
}
 }
diff --git a/src/glsl/ir.h b/src/glsl/ir.h
index b0e7732..6c5630b 100644
--- a/src/glsl/ir.h
+++ b/src/glsl/ir.h
@@ -1922,10 +1922,10 @@ union ir_constant_data {
 class ir_constant : public ir_rvalue {
 public:
ir_constant(const struct glsl_type *type, const ir_constant_data *data);
-   ir_constant(bool b);
-   ir_constant(unsigned int u);
-   ir_constant(int i);
-   ir_constant(float f);
+   ir_constant(bool b, unsigned vector_elements=1);
+   ir_constant(unsigned int u, unsigned vector_elements=1);
+   ir_constant(int i, unsigned vector_elements=1);
+   ir_constant(float f, unsigned vector_elements

[Mesa-dev] [PATCH 3/5] glsl: Add ldexp_to_arith lowering pass.

2013-09-09 Thread Matt Turner
Reviewed-by: Paul Berry 
---
 src/glsl/ir_optimization.h  |   1 +
 src/glsl/lower_instructions.cpp | 128 
 2 files changed, 129 insertions(+)

diff --git a/src/glsl/ir_optimization.h b/src/glsl/ir_optimization.h
index b79c2b7..074686c 100644
--- a/src/glsl/ir_optimization.h
+++ b/src/glsl/ir_optimization.h
@@ -38,6 +38,7 @@
 #define INT_DIV_TO_MUL_RCP 0x40
 #define LRP_TO_ARITH   0x80
 #define BITFIELD_INSERT_TO_BFM_BFI 0x100
+#define LDEXP_TO_ARITH 0x200
 
 /**
  * \see class lower_packing_builtins_visitor
diff --git a/src/glsl/lower_instructions.cpp b/src/glsl/lower_instructions.cpp
index d32ec80..cb53048 100644
--- a/src/glsl/lower_instructions.cpp
+++ b/src/glsl/lower_instructions.cpp
@@ -37,6 +37,7 @@
  * - POW_TO_EXP2
  * - LOG_TO_LOG2
  * - MOD_TO_FRACT
+ * - LDEXP_TO_ARITH
  * - LRP_TO_ARITH
  * - BITFIELD_INSERT_TO_BFM_BFI
  *
@@ -82,6 +83,10 @@
  * if we have to break it down like this anyway, it gives an
  * opportunity to do things like constant fold the (1.0 / op1) easily.
  *
+ * LDEXP_TO_ARITH:
+ * -
+ * Converts ir_binop_ldexp to arithmetic and bit operations.
+ *
  * LRP_TO_ARITH:
  * -
  * Converts ir_triop_lrp to (op0 * (1.0f - op2)) + (op1 * op2).
@@ -125,6 +130,7 @@ private:
void log_to_log2(ir_expression *);
void lrp_to_arith(ir_expression *);
void bitfield_insert_to_bfm_bfi(ir_expression *);
+   void ldexp_to_arith(ir_expression *);
 };
 
 /**
@@ -332,6 +338,123 @@ 
lower_instructions_visitor::bitfield_insert_to_bfm_bfi(ir_expression *ir)
this->progress = true;
 }
 
+void
+lower_instructions_visitor::ldexp_to_arith(ir_expression *ir)
+{
+   /* Translates
+*ir_binop_ldexp x exp
+* into
+*
+*extracted_biased_exp = rshift(bitcast_f2i(abs(x)), exp_shift);
+*resulting_biased_exp = extracted_biased_exp + exp;
+*
+*if (resulting_biased_exp < 1) {
+*   return copysign(0.0, x);
+*}
+*
+*return bitcast_u2f((bitcast_f2u(x) & sign_mantissa_mask) |
+*   lshift(i2u(resulting_biased_exp), exp_shift));
+*
+* which we can't actually implement as such, since the GLSL IR doesn't
+* have vectorized if-statements. We actually implement it without branches
+* using conditional-select:
+*
+*extracted_biased_exp = rshift(bitcast_f2i(abs(x)), exp_shift);
+*resulting_biased_exp = extracted_biased_exp + exp;
+*
+*is_not_zero_or_underflow = gequal(resulting_biased_exp, 1);
+*x = csel(is_not_zero_or_underflow, x, copysign(0.0f, x));
+*resulting_biased_exp = csel(is_not_zero_or_underflow,
+*resulting_biased_exp, 0);
+*
+*return bitcast_u2f((bitcast_f2u(x) & sign_mantissa_mask) |
+*   lshift(i2u(resulting_biased_exp), exp_shift));
+*/
+
+   const unsigned vec_elem = ir->type->vector_elements;
+
+   /* Types */
+   const glsl_type *ivec = glsl_type::get_instance(GLSL_TYPE_INT, vec_elem, 1);
+   const glsl_type *bvec = glsl_type::get_instance(GLSL_TYPE_BOOL, vec_elem, 
1);
+
+   /* Constants */
+   ir_constant *zeroi = ir_constant::zero(ir, ivec);
+   ir_constant *zerof = ir_constant::zero(ir, ir->type);
+
+   ir_constant *sign_mantissa_mask = new(ir) ir_constant(0x807fu, 
vec_elem);
+   ir_constant *sign_mask = new(ir) ir_constant(0x8000u, vec_elem);
+
+   ir_constant *exp_shift = new(ir) ir_constant(23u, vec_elem);
+
+   /* Temporary variables */
+   ir_variable *x = new(ir) ir_variable(ir->type, "x", ir_var_temporary);
+   ir_variable *exp = new(ir) ir_variable(ivec, "exp", ir_var_temporary);
+
+   ir_variable *zero_sign_x = new(ir) ir_variable(ir->type, "zero_sign_x",
+  ir_var_temporary);
+
+   ir_variable *extracted_biased_exp =
+  new(ir) ir_variable(ivec, "extracted_biased_exp", ir_var_temporary);
+   ir_variable *resulting_biased_exp =
+  new(ir) ir_variable(ivec, "resulting_biased_exp", ir_var_temporary);
+
+   ir_variable *is_not_zero_or_underflow =
+  new(ir) ir_variable(bvec, "is_not_zero_or_underflow", ir_var_temporary);
+
+   ir_instruction &i = *base_ir;
+
+   /* Copy  and  arguments. */
+   i.insert_before(x);
+   i.insert_before(assign(x, ir->operands[0]));
+   i.insert_before(exp);
+   i.insert_before(assign(exp, ir->operands[1]));
+
+   /* Extract the biased exponent from . */
+   i.insert_before(extracted_biased_exp);
+   i.insert_before(assign(extracted_biased_exp,
+  rshift(bitcast_f2i(abs(x)), exp_shift)));
+
+   i.insert_before(resulting_biased_exp);
+   i.insert_before(assign(resulting_biased_exp,
+  add(extracted_biased_exp, exp)));
+
+   /* Test if result is ±0.0, subnormal, or underflow by checking if the
+* resulting biased exponent would be less than 0x1. If so, the result is
+* 0.0 with the sign of x. (Actually, invert the conditions so that

[Mesa-dev] [PATCH 4/5] i965: Lower ldexp.

2013-09-09 Thread Matt Turner
v2: Drop frexp lowering.
Reviewed-by: Paul Berry  [v1]
---
 src/mesa/drivers/dri/i965/brw_shader.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp 
b/src/mesa/drivers/dri/i965/brw_shader.cpp
index e7dbdbe..abfa327 100644
--- a/src/mesa/drivers/dri/i965/brw_shader.cpp
+++ b/src/mesa/drivers/dri/i965/brw_shader.cpp
@@ -154,7 +154,8 @@ brw_link_shader(struct gl_context *ctx, struct 
gl_shader_program *shProg)
 EXP_TO_EXP2 |
 LOG_TO_LOG2 |
  bitfield_insert |
- lrp_to_arith);
+ lrp_to_arith |
+ LDEXP_TO_ARITH);
 
   /* Pre-gen6 HW can only nest if-statements 16 deep.  Beyond this,
* if-statements need to be flattened.
-- 
1.8.3.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 5/5] glsl: Add frexp signatures and implementation.

2013-09-09 Thread Matt Turner
I initially implemented frexp() as an IR opcode with a lowering pass,
but since it returns a value and has an out-parameter, it would break
assumptions our optimization passes make about ir_expressions being pure
(i.e., having no side effects).

For example, if opt_tree_grafting encounters this code:

uniform float u;
void main()
{
  int exp;
  float f = frexp(u, out exp);
  float g = float(exp)/256.0;
  float h = float(exp) + 1.0;
  gl_FragColor = vec4(f, g, h, g + h);
}

it may try to optimize it to this:

uniform float u;
void main()
{
  int exp;
  float g = float(exp)/256.0;
  float h = float(exp) + 1.0;
  gl_FragColor = vec4(frexp(u, out exp), g, h, g + h);
}

Some hardware has an instruction which performs frexp(), but we would
need some other compiler infrastructure to be able to generate it, such
as an intrinsics system that would allow backends to emit specific code
for particular bits of IR.
---
 src/glsl/builtin_functions.cpp | 54 ++
 1 file changed, 54 insertions(+)

diff --git a/src/glsl/builtin_functions.cpp b/src/glsl/builtin_functions.cpp
index dbd35f2..e9d7b74 100644
--- a/src/glsl/builtin_functions.cpp
+++ b/src/glsl/builtin_functions.cpp
@@ -512,6 +512,7 @@ private:
B1(findMSB)
B1(fma)
B2(ldexp)
+   B2(frexp)
 #undef B0
 #undef B1
 #undef B2
@@ -1828,6 +1829,13 @@ builtin_builder::create_builtins()
 _ldexp(glsl_type::vec3_type,  glsl_type::ivec3_type),
 _ldexp(glsl_type::vec4_type,  glsl_type::ivec4_type),
 NULL);
+
+   add_function("frexp",
+_frexp(glsl_type::float_type, glsl_type::int_type),
+_frexp(glsl_type::vec2_type,  glsl_type::ivec2_type),
+_frexp(glsl_type::vec3_type,  glsl_type::ivec3_type),
+_frexp(glsl_type::vec4_type,  glsl_type::ivec4_type),
+NULL);
 #undef F
 #undef FI
 #undef FIU
@@ -3524,6 +3532,52 @@ builtin_builder::_ldexp(const glsl_type *x_type, const 
glsl_type *exp_type)
 {
return binop(ir_binop_ldexp, gpu_shader5, x_type, x_type, exp_type);
 }
+
+ir_function_signature *
+builtin_builder::_frexp(const glsl_type *x_type, const glsl_type *exp_type)
+{
+   ir_variable *x = in_var(x_type, "x");
+   ir_variable *exponent = out_var(exp_type, "exp");
+   MAKE_SIG(x_type, gpu_shader5, 2, x, exponent);
+
+   const unsigned vec_elem = x_type->vector_elements;
+   const glsl_type *bvec = glsl_type::get_instance(GLSL_TYPE_BOOL, vec_elem, 
1);
+   const glsl_type *uvec = glsl_type::get_instance(GLSL_TYPE_UINT, vec_elem, 
1);
+
+   /* Single-precision floating-point values are stored as
+*   1 sign bit;
+*   8 exponent bits;
+*   23 mantissa bits.
+*
+* An exponent shift of 23 will shift the mantissa out, leaving only the
+* exponent and sign bit (which itself may be zero, if the absolute value
+* was taken before the bitcast and shift.
+*/
+   ir_constant *exponent_shift = imm(23);
+   ir_constant *exponent_bias = imm(-126, vec_elem);
+
+   ir_constant *sign_mantissa_mask = imm(0x807fu, vec_elem);
+   ir_constant *exponent_mask = imm(0x3f00u, vec_elem);
+
+   ir_variable *is_not_zero = body.make_temp(bvec, "is_not_zero");
+   body.emit(assign(is_not_zero, nequal(abs(x), imm(0.0f, vec_elem;
+
+   /* Since abs(x) ensures that the sign bit is zero, we don't need to bitcast
+* to unsigned integers to ensure that 1 bits aren't shifted in.
+*/
+   body.emit(assign(exponent, rshift(bitcast_f2i(abs(x)), exponent_shift)));
+   body.emit(assign(exponent, add(exponent, csel(is_not_zero, exponent_bias,
+ imm(0, vec_elem);
+
+   ir_variable *bits = body.make_temp(uvec, "bits");
+   body.emit(assign(bits, bitcast_f2u(x)));
+   body.emit(assign(bits, bit_and(bits, sign_mantissa_mask)));
+   body.emit(assign(bits, bit_or(bits, csel(is_not_zero, exponent_mask,
+imm(0u, vec_elem);
+   body.emit(ret(bitcast_u2f(bits)));
+
+   return sig;
+}
 /** @} */
 
 
/**/
-- 
1.8.3.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 0/5] Implement ldexp and frexp built-ins

2013-09-09 Thread Matt Turner
This is a respin of the previous series, rebased on Ken's built-in
rewrite. The first four patches original versions have reviews by Paul
and contain either trivial or no changes at all. Patch 5 is the third
rewrite of the frexp() implementation (1st was the lowering pass; 2nd
was an unpublished GLSL implementation that exposed bugs in the old
built-in code which prompted Ken to fix it all :)

Thanks,
Matt
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 69148] configure does not accept --with-driver

2013-09-09 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=69148

Matt Turner  changed:

   What|Removed |Added

 Status|NEW |ASSIGNED
   Assignee|mesa-dev@lists.freedesktop. |matts...@gmail.com
   |org |
 QA Contact||mesa-dev@lists.freedesktop.
   ||org

--- Comment #1 from Matt Turner  ---
The documentation should be updated. I'll send a patch.

To enable Xlib or OSMesa, use --enable-xlib-glx and --enable-osmesa
respectively. 

Also, ./configure --help will always be up to date.

-- 
You are receiving this mail because:
You are the QA Contact for the bug.
You are the assignee for the bug.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] i965/vec4: Only zero out unused message components when there are any.

2013-09-09 Thread Kenneth Graunke

On 09/09/2013 01:54 PM, Ian Romanick wrote:

On 09/09/2013 03:35 PM, Ian Romanick wrote:

On 09/09/2013 01:38 PM, Kenneth Graunke wrote:

Otherwise, coordinates with four components would result in a MOV
with a destination writemask that has no channels enabled:

mov(8) g115<1>.F 0D { align16 WE_normal NoDDChk 1Q };

At best, this is stupid: we emit code that shouldn't do anything.
Worse, it apparently causes GPU hangs (observable with Chris's
textureGather test on CubeArrays.)


I also meant to say:

Do we have something like ir_validate for GEN assembly?  It sure seems
like we should...


Not at present.  It would be nice to have validators for the VS and FS 
IR, to check constraints like r.reg_offset < virtual_grf_size[r.reg].


The brw_eu_emit layer does have a pile of assertions for various 
constraints, which is the closest thing we have.  Apparently this one 
slipped through.


--Ken
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/3] r600g: add support for separately allocated CMASKs

2013-09-09 Thread Grigori Goronzy

On 09.09.2013 16:09, Marek Olšák wrote:

 /* Check colorbuffers. */
 for (i = 0; i < rctx->framebuffer.state.nr_cbufs; i++) {
+   struct r600_texture *tex =
+   (struct 
r600_texture*)rctx->framebuffer.state.cbufs[i]->texture;
+


Please check if cbufs[i] != NULL.



This seems to be wrong in a lot of places. It is a general issue and I 
think this should really be tackled in a separate commit. I'll fix up 
this part (to not regress the particular function), but leave it at that.


Introducing NULL checks everywhere is not particularly neat anyway. I 
wonder if there is a better way...


Best regards
Grigori

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/3] i965/vec4: Simplify the computation of coord_mask and zero_mask.

2013-09-09 Thread Kenneth Graunke
We can easily compute these without loops, resulting in simpler and
shorter code.

Signed-off-by: Kenneth Graunke 
Cc: Ian Romanick 
Cc: Chris Forbes 
---
 src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 7 ++-
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp 
b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
index 28dc313..a51b61b 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
@@ -2221,13 +2221,10 @@ vec4_visitor::visit(ir_texture *ir)
   int writemask = brw->gen == 4 ? WRITEMASK_W : WRITEMASK_X;
   emit(MOV(dst_reg(MRF, param_base, lod_type, writemask), lod));
} else {
-  int i, coord_mask = 0, zero_mask = 0;
   /* Load the coordinate */
   /* FINISHME: gl_clamp_mask and saturate */
-  for (i = 0; i < ir->coordinate->type->vector_elements; i++)
-coord_mask |= (1 << i);
-  for (; i < 4; i++)
-zero_mask |= (1 << i);
+  int coord_mask = (1 << ir->coordinate->type->vector_elements) - 1;
+  int zero_mask = 0xf & ~coord_mask;
 
   if (ir->offset && ir->op == ir_txf) {
 /* It appears that the ld instruction used for txf does its
-- 
1.8.3.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/3] i965/vec4: Only zero out unused message components when there are any.

2013-09-09 Thread Kenneth Graunke
Otherwise, coordinates with four components would result in a MOV
with a destination writemask that has no channels enabled:

mov(8) g115<1>.F 0D { align16 WE_normal NoDDChk 1Q };

At best, this is stupid: we emit code that shouldn't do anything.
Worse, it apparently causes GPU hangs (observable with Chris's
textureGather test on CubeArrays.)

Signed-off-by: Kenneth Graunke 
Reviewed-by: Chris Forbes 
Reviewed-by: Ian Romanick 
Cc: mesa-sta...@lists.freedesktop.org
---
 src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 6 --
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp 
b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
index a51b61b..86ecd21 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
@@ -2248,8 +2248,10 @@ vec4_visitor::visit(ir_texture *ir)
 emit(MOV(dst_reg(MRF, param_base, ir->coordinate->type, coord_mask),
  coordinate));
   }
-  emit(MOV(dst_reg(MRF, param_base, ir->coordinate->type, zero_mask),
-  src_reg(0)));
+  if (zero_mask != 0) {
+ emit(MOV(dst_reg(MRF, param_base, ir->coordinate->type, zero_mask),
+  src_reg(0)));
+  }
   /* Load the shadow comparitor */
   if (ir->shadow_comparitor && ir->op != ir_txd) {
 emit(MOV(dst_reg(MRF, param_base + 1, ir->shadow_comparitor->type,
-- 
1.8.3.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 3/3] i965: Add an assertion that writemask != NULL for non-ARFs.

2013-09-09 Thread Kenneth Graunke
We've observed GPU hangs on Ivybridge from the following instruction:

mov(8) g115<1>.F 0D { align16 WE_normal NoDDChk 1Q };

There should be no reason to ever set the writemask on a destination
register to zero, except for perhaps the ARF NULL register.

This patch adds an assertion to enforce this for non-ARF registers.
Excluding ARFs is conservative yet should still catch the majority
of mistakes.

Signed-off-by: Kenneth Graunke 
Cc: Ian Romanick 
Cc: Chris Forbes 
---
 src/mesa/drivers/dri/i965/brw_eu_emit.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c 
b/src/mesa/drivers/dri/i965/brw_eu_emit.c
index f26c913..36c494e 100644
--- a/src/mesa/drivers/dri/i965/brw_eu_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c
@@ -126,6 +126,8 @@ brw_set_dest(struct brw_compile *p, struct brw_instruction 
*insn,
   else {
 insn->bits1.da16.dest_subreg_nr = dest.subnr / 16;
 insn->bits1.da16.dest_writemask = dest.dw1.bits.writemask;
+ assert(dest.dw1.bits.writemask != 0 ||
+dest.file == BRW_ARCHITECTURE_REGISTER_FILE);
 /* From the Ivybridge PRM, Vol 4, Part 3, Section 5.2.4.1:
  *Although Dst.HorzStride is a don't care for Align16, HW needs
  *this to be programmed as "01".
-- 
1.8.3.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] glsl: Implement MESA_shader_integer_mix extension.

2013-09-09 Thread Matt Turner
Because why doesn't GLSL allow you to do this already?

Reviewed-by: Kenneth Graunke 
---
 docs/specs/MESA_shader_integer_mix.spec | 135 
 src/glsl/builtin_functions.cpp  |  39 +++--
 src/glsl/glcpp/glcpp-parse.y|   3 +
 src/glsl/glsl_parser_extras.cpp |   1 +
 src/glsl/glsl_parser_extras.h   |   2 +
 src/mesa/main/extensions.c  |   1 +
 src/mesa/main/mtypes.h  |   3 +-
 7 files changed, 176 insertions(+), 8 deletions(-)
 create mode 100644 docs/specs/MESA_shader_integer_mix.spec

diff --git a/docs/specs/MESA_shader_integer_mix.spec 
b/docs/specs/MESA_shader_integer_mix.spec
new file mode 100644
index 000..d381ddd
--- /dev/null
+++ b/docs/specs/MESA_shader_integer_mix.spec
@@ -0,0 +1,135 @@
+Name
+
+MESA_shader_integer_mix
+
+Name Strings
+
+GL_MESA_shader_integer_mix
+
+Contact
+
+Matt Turner (matt.turner 'at' intel.com)
+
+Contributors
+
+Matt Turner, Intel
+Ian Romanick, Intel
+
+Status
+
+Shipping
+
+Version
+
+Last Modified Date: 09/09/2013
+Author Revision:5
+
+Number
+
+
+
+Dependencies
+
+OpenGL 3.0 or OpenGL ES 3.0 is required. This extension interacts with
+GL_ARB_ES3_compatibility.
+
+This extension is written against the OpenGL 4.4 (core) specification
+and the GLSL 4.40 specification.
+
+Overview
+
+GLSL 1.30 (and GLSL ES 3.00) expanded the mix() built-in function to
+operate on a boolean third argument that does not interpolate but
+selects. This extension extends mix() to select between int, uint,
+and bool components.
+
+New Procedures and Functions
+
+None.
+
+New Tokens
+
+None.
+
+Additions to Chapter 8 of the GLSL 4.40 Specification (Built-in Functions)
+
+Modify Section 8.3, Common Functions
+
+Additions to the table listing common built-in functions:
+
+  Syntax   Description
+  ---  
--
+  genIType mix(genIType x, Selects which vector each returned 
component comes
+   genIType y, from. For a component of a that is false, 
the
+   genBType a) corresponding component of x is returned. 
For a
+  genUType mix(genUType x, component of a that is true, the 
corresponding
+   genUType y, component of y is returned.
+   genBType a)
+  genBType mix(genBType x,
+   genBType y,
+   genBType a)
+
+Additions to the AGL/GLX/WGL Specifications
+
+None.
+
+Modifications to The OpenGL Shading Language Specification, Version 4.40
+
+Including the following line in a shader can be used to control the
+language features described in this extension:
+
+  #extension GL_MESA_shader_integer_mix : 
+
+where  is as specified in section 3.3.
+
+New preprocessor #defines are added to the OpenGL Shading Language:
+
+  #define GL_MESA_shader_integer_mix1
+
+Interactions with ARB_ES3_compatibility
+
+On desktop implementations that support ARB_ES3_compatibility,
+GL_MESA_shader_integer_mix can be enabled (and the new functions
+used) in shaders declared with '#version 300 es'.
+
+GLX Protocol
+
+None.
+
+Errors
+
+None.
+
+New State
+
+None.
+
+New Implementation Dependent State
+
+None.
+
+Issues
+
+1) Should we allow linear interpolation of integers via a non-boolean
+   third component?
+
+RESOLVED: No.
+
+2) Should we allow mix() to select between boolean components?
+
+RESOLVED: Yes. Implementing the same functionality using casts would be
+possible but ugly.
+
+Revision History
+
+Rev.Date  AuthorChanges
+    -
+  5   09/09/2013  idr   Add ARB_ES3_compatibility interaction.
+
+  4   09/06/2013  mattst88  Allow extension on OpenGL ES 3.0.
+
+  3   08/28/2013  mattst88  Add #extension/#define changes.
+
+  2   08/26/2013  mattst88  Change vendor prefix to MESA. Add mix() that
+selects between boolean components.
+  1   08/26/2013  mattst88  Initial revision
diff --git a/src/glsl/builtin_functions.cpp b/src/glsl/builtin_functions.cpp
index 7b7bae6..cc5e2a7 100644
--- a/src/glsl/builtin_functions.cpp
+++ b/src/glsl/builtin_functions.cpp
@@ -178,6 +178,12 @@ shader_bit_encoding(const _mesa_glsl_parse_state *state)
 }
 
 static bool
+shader_integer_mix(const _mesa_glsl_parse_state *state)
+{
+   return v130(state) && state->MESA_shader_integer_mix_enable;
+}
+
+static bool
 shader_packing(const _mesa_glsl_parse_state *state)
 {
return state->ARB_shading_language_packing_enable ||
@@ -391,7 +397,9 @@ private:
B2(max)
B2(clamp)
B2(mix_lrp)
-   B2(mix_sel)
+   ir_function_signature *_mix_sel(builtin_available_predicate avail,
+   

[Mesa-dev] [PATCH 2/2] glsl: fix variadic macro for MSVC

2013-09-09 Thread Brian Paul
MSVC doesn't accept the rest... syntax.
---
 src/glsl/builtin_functions.cpp |4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/glsl/builtin_functions.cpp b/src/glsl/builtin_functions.cpp
index eca41aa..5d8f171 100644
--- a/src/glsl/builtin_functions.cpp
+++ b/src/glsl/builtin_functions.cpp
@@ -1935,9 +1935,9 @@ builtin_builder::new_sig(const glsl_type *return_type,
return sig;
 }
 
-#define MAKE_SIG(return_type, avail, rest...) \
+#define MAKE_SIG(return_type, avail, ...)  \
ir_function_signature *sig =   \
-  new_sig(return_type, avail, rest);  \
+  new_sig(return_type, avail, __VA_ARGS__);  \
ir_factory body(&sig->body, mem_ctx);
 
 ir_function_signature *
-- 
1.7.10.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/2] glsl: remove struct keyword from ir_variable declarations

2013-09-09 Thread Brian Paul
To silence MSVC warnings.
---
 src/glsl/opt_dead_builtin_varyings.cpp |8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/glsl/opt_dead_builtin_varyings.cpp 
b/src/glsl/opt_dead_builtin_varyings.cpp
index 6745d5c..3cdd130 100644
--- a/src/glsl/opt_dead_builtin_varyings.cpp
+++ b/src/glsl/opt_dead_builtin_varyings.cpp
@@ -391,10 +391,10 @@ public:
 
 private:
const varying_info_visitor *info;
-   struct ir_variable *new_texcoord[MAX_TEXTURE_COORD_UNITS];
-   struct ir_variable *new_color[2];
-   struct ir_variable *new_backcolor[2];
-   struct ir_variable *new_fog;
+   ir_variable *new_texcoord[MAX_TEXTURE_COORD_UNITS];
+   ir_variable *new_color[2];
+   ir_variable *new_backcolor[2];
+   ir_variable *new_fog;
 };
 
 
-- 
1.7.10.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] docs: Clean up autoconf.html.

2013-09-09 Thread Matt Turner
Remove long dead options and clarify some things.

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=69148
---
 docs/autoconf.html | 37 ++---
 1 file changed, 10 insertions(+), 27 deletions(-)

diff --git a/docs/autoconf.html b/docs/autoconf.html
index a07a3ee..d4e8a35 100644
--- a/docs/autoconf.html
+++ b/docs/autoconf.html
@@ -123,24 +123,6 @@ directories.
 There are also a few general options for altering the Mesa build:
 
 
---with-x
-When the X11 development libraries are
-needed, the pkg-config utility will
-be used for locating them. If they cannot be found through
-pkg-config a fallback routing using imake will
-be used. In this case, the --with-x,
---x-includes and --x-libraries options can
-control the use of X for Mesa.
-
-
---enable-gl-osmesa
-The OSMesa
-library can be built on top of libGL for drivers that provide it.
-This option controls whether to build libOSMesa. By default, this is
-enabled for the Xlib driver and disabled otherwise. Note that this
-option is different than using OSMesa as the driver.
-
-
 --enable-debug
 This option will enable compiler
 options and macros to aid in debugging the Mesa libraries.
@@ -155,12 +137,12 @@ assembly will not be used.
 
 --enable-32-bit
 --enable-64-bit
-By default, the
-build will compile code as directed by the environment variables
+By default, the build will compile code as directed by the environment
+variables
 CC, CFLAGS, etc. If the compiler is
 gcc, these options offer a helper to add the compiler flags
 to force 32- or 64-bit code generation as used on the x86 and x86_64
-architectures.
+architectures. Note that these options are mutually exclusive.
 
 
 
@@ -171,19 +153,19 @@ architectures.
 There are several different driver modes that Mesa can use. These are
 described in more detail in the basic
 installation instructions. The Mesa driver is controlled through the
-configure option --with-driver. There are currently three supported
-options in the configure script.
+configure options --enable-xlib-glx, --enable-osmesa,
+and --enable-dri.
 
 
-XlibThis is the default mode for building Mesa.
+Xlib
 It uses Xlib as a software renderer to do all rendering. It corresponds
-to the option --with-driver=xlib. The libX11 and libXext
+to the option --enable-xlib-glx. The libX11 and libXext
 libraries, as well as the X11 development headers, will be need to
 support the Xlib driver.
 
 DRIThis mode uses the DRI hardware drivers for
 accelerated OpenGL rendering. Enable the DRI drivers with the option
---with-driver=dri. See the basic
+--enable-dri. See the basic
 installation instructions for details on prerequisites for the DRI
 drivers.
 
@@ -223,7 +205,8 @@ and /usr/local/lib, respectively.
 OSMesa  No libGL is built in this
 mode. Instead, the driver code is built into the Off-Screen Mesa
 (OSMesa) library. See the Off-Screen Rendering
-page for more details.
+page for more details.  It corresponds to the option
+--enable-osmesa.
 
 
 
-- 
1.8.3.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 69148] configure does not accept --with-driver

2013-09-09 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=69148

--- Comment #2 from Matt Turner  ---
Patch sent.

-- 
You are receiving this mail because:
You are the QA Contact for the bug.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/9] i965/gs: Add a state atom to set up geometry shader state.

2013-09-09 Thread Kenneth Graunke
On 09/09/2013 08:20 AM, Paul Berry wrote:
> v2: Do not attempt to share the code that uploads
> 3DSTATE_BINDING_TABLE_POINTERS_GS, 3DSTATE_SAMPLER_STATE_POINTERS_GS,
> or 3DSTATE_GS with VS.
> ---
>  src/mesa/drivers/dri/i965/Makefile.sources   |   1 +
>  src/mesa/drivers/dri/i965/brw_defines.h  |   7 ++
>  src/mesa/drivers/dri/i965/brw_state.h|   2 +
>  src/mesa/drivers/dri/i965/brw_state_upload.c |   2 +
>  src/mesa/drivers/dri/i965/gen7_disable.c |  33 --
>  src/mesa/drivers/dri/i965/gen7_gs_state.c| 144 
> +++
>  6 files changed, 156 insertions(+), 33 deletions(-)
>  create mode 100644 src/mesa/drivers/dri/i965/gen7_gs_state.c
> 
> diff --git a/src/mesa/drivers/dri/i965/Makefile.sources 
> b/src/mesa/drivers/dri/i965/Makefile.sources
> index 5299d0d..07c1053 100644
> --- a/src/mesa/drivers/dri/i965/Makefile.sources
> +++ b/src/mesa/drivers/dri/i965/Makefile.sources
> @@ -122,6 +122,7 @@ i965_FILES = \
>   gen7_blorp.cpp \
>   gen7_clip_state.c \
>   gen7_disable.c \
> +gen7_gs_state.c \
>   gen7_misc_state.c \
>   gen7_sampler_state.c \
>   gen7_sf_state.c \
> diff --git a/src/mesa/drivers/dri/i965/brw_defines.h 
> b/src/mesa/drivers/dri/i965/brw_defines.h
> index d5a12f1..0406c4d 100644
> --- a/src/mesa/drivers/dri/i965/brw_defines.h
> +++ b/src/mesa/drivers/dri/i965/brw_defines.h
> @@ -1328,15 +1328,22 @@ enum brw_message_target {
>  # define GEN6_GS_FLOATING_POINT_MODE_IEEE_754(0 << 16)
>  # define GEN6_GS_FLOATING_POINT_MODE_ALT (1 << 16)
>  /* DW4 */
> +# define GEN7_GS_OUTPUT_VERTEX_SIZE_SHIFT23
> +# define GEN7_GS_OUTPUT_TOPOLOGY_SHIFT   17
>  # define GEN6_GS_URB_READ_LENGTH_SHIFT   11
>  # define GEN7_GS_INCLUDE_VERTEX_HANDLES  (1 << 10)
>  # define GEN6_GS_URB_ENTRY_READ_OFFSET_SHIFT 4
>  # define GEN6_GS_DISPATCH_START_GRF_SHIFT0
>  /* DW5 */
>  # define GEN6_GS_MAX_THREADS_SHIFT   25
> +# define HSW_GS_MAX_THREADS_SHIFT24
> +# define GEN7_GS_DISPATCH_MODE_SINGLE(0 << 11)
> +# define GEN7_GS_DISPATCH_MODE_DUAL_INSTANCE (1 << 11)
> +# define GEN7_GS_DISPATCH_MODE_DUAL_OBJECT   (2 << 11)
>  # define GEN6_GS_STATISTICS_ENABLE   (1 << 10)
>  # define GEN6_GS_SO_STATISTICS_ENABLE(1 << 9)
>  # define GEN6_GS_RENDERING_ENABLE(1 << 8)
> +# define GEN7_GS_INCLUDE_PRIMITIVE_ID(1 << 4)
>  # define GEN7_GS_ENABLE  (1 << 0)
>  /* DW6 */
>  # define GEN6_GS_REORDER (1 << 30)
> diff --git a/src/mesa/drivers/dri/i965/brw_state.h 
> b/src/mesa/drivers/dri/i965/brw_state.h
> index 4c4a536..04c1a97 100644
> --- a/src/mesa/drivers/dri/i965/brw_state.h
> +++ b/src/mesa/drivers/dri/i965/brw_state.h
> @@ -116,6 +116,8 @@ extern const struct brw_tracked_state gen7_depthbuffer;
>  extern const struct brw_tracked_state gen7_cc_viewport_state_pointer;
>  extern const struct brw_tracked_state gen7_clip_state;
>  extern const struct brw_tracked_state gen7_disable_stages;
> +extern const struct brw_tracked_state gen7_gs_push_constants;
> +extern const struct brw_tracked_state gen7_gs_state;
>  extern const struct brw_tracked_state gen7_ps_state;
>  extern const struct brw_tracked_state gen7_push_constant_space;
>  extern const struct brw_tracked_state gen7_sbe_state;
> diff --git a/src/mesa/drivers/dri/i965/brw_state_upload.c 
> b/src/mesa/drivers/dri/i965/brw_state_upload.c
> index b6a6a0a..8f21f06 100644
> --- a/src/mesa/drivers/dri/i965/brw_state_upload.c
> +++ b/src/mesa/drivers/dri/i965/brw_state_upload.c
> @@ -196,6 +196,7 @@ static const struct brw_tracked_state *gen7_atoms[] =
> &gen6_depth_stencil_state,/* must do before cc unit */
>  
> &gen6_vs_push_constants, /* Before vs_state */
> +   &gen7_gs_push_constants, /* Before gs_state */
> &gen6_wm_push_constants, /* Before wm_surfaces and constant_buffer */
>  
> /* Surface state setup.  Must come before the VS/WM unit.  The binding
> @@ -220,6 +221,7 @@ static const struct brw_tracked_state *gen7_atoms[] =
>  
> &gen7_disable_stages,
> &gen7_vs_state,
> +   &gen7_gs_state,
> &gen7_sol_state,
> &gen7_clip_state,
> &gen7_sbe_state,
> diff --git a/src/mesa/drivers/dri/i965/gen7_disable.c 
> b/src/mesa/drivers/dri/i965/gen7_disable.c
> index 860aa95..98d115b 100644
> --- a/src/mesa/drivers/dri/i965/gen7_disable.c
> +++ b/src/mesa/drivers/dri/i965/gen7_disable.c
> @@ -29,39 +29,6 @@
>  static void
>  disable_stages(struct brw_context *brw)
>  {
> -   assert(!brw->ff_gs.prog_active);
> -
> -   /* Disable the Geometry Shader (GS) Unit */
> -   BEGIN_BATCH(7);
> -   OUT_BATCH(_3DSTATE_CONSTANT_GS << 16 | (7 - 2));
> -   OUT_BATCH(0);
> -   OUT_BATCH(0);
> -   OUT_BATCH(0);
> -   OUT_BATCH(0);
> -   OUT_BATCH(0)

Re: [Mesa-dev] [PATCH 2/2] glsl: fix variadic macro for MSVC

2013-09-09 Thread Kenneth Graunke
On 09/09/2013 04:03 PM, Brian Paul wrote:
> MSVC doesn't accept the rest... syntax.
> ---
>  src/glsl/builtin_functions.cpp |4 ++--
>  1 file changed, 2 insertions(+), 2 deletions(-)
> 
> diff --git a/src/glsl/builtin_functions.cpp b/src/glsl/builtin_functions.cpp
> index eca41aa..5d8f171 100644
> --- a/src/glsl/builtin_functions.cpp
> +++ b/src/glsl/builtin_functions.cpp
> @@ -1935,9 +1935,9 @@ builtin_builder::new_sig(const glsl_type *return_type,
> return sig;
>  }
>  
> -#define MAKE_SIG(return_type, avail, rest...) \
> +#define MAKE_SIG(return_type, avail, ...)  \
> ir_function_signature *sig =   \
> -  new_sig(return_type, avail, rest);  \
> +  new_sig(return_type, avail, __VA_ARGS__);  \
> ir_factory body(&sig->body, mem_ctx);
>  
>  ir_function_signature *

I always forget about this.  Sorry, Brian.

Both patches are:
Reviewed-by: Kenneth Graunke 

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 3/9] glsl: During linking, record whether a GS uses EndPrimitive().

2013-09-09 Thread Kenneth Graunke
On 09/09/2013 08:20 AM, Paul Berry wrote:
> This information will be useful in the i965 back end, since we can
> save some compilation effort if we know from the outset that the
> shader never calls EndPrimitive().
> ---
>  src/glsl/linker.cpp   | 31 +++
>  src/mesa/main/mtypes.h|  2 ++
>  src/mesa/main/shaderapi.c |  1 +
>  3 files changed, 34 insertions(+)
> 
> diff --git a/src/glsl/linker.cpp b/src/glsl/linker.cpp
> index 8430096..f10303e 100644
> --- a/src/glsl/linker.cpp
> +++ b/src/glsl/linker.cpp
> @@ -249,6 +249,33 @@ public:
>  };
>  
>  
> +/**
> + * Visitor that determines whether or not a shader uses ir_end_primitive.
> + */
> +class find_end_primitive_visitor : public ir_hierarchical_visitor {
> +public:
> +   find_end_primitive_visitor()
> +  : found(false)
> +   {
> +  /* empty */
> +   }
> +
> +   virtual ir_visitor_status visit(ir_end_primitive *)
> +   {
> +  found = true;
> +  return visit_stop;
> +   }
> +
> +   bool end_primitive_found()
> +   {
> +  return found;
> +   }
> +
> +private:
> +   bool found;
> +};
> +
> +
>  void
>  linker_error(gl_shader_program *prog, const char *fmt, ...)
>  {
> @@ -517,6 +544,10 @@ validate_geometry_shader_executable(struct 
> gl_shader_program *prog,
>  
> analyze_clip_usage("geometry", prog, shader, &prog->Geom.UsesClipDistance,
>&prog->Geom.ClipDistanceArraySize);
> +
> +   find_end_primitive_visitor end_primitive;
> +   end_primitive.run(shader->ir);
> +   prog->Geom.UsesEndPrimitive = end_primitive.end_primitive_found();
>  }
>  
>  
> diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h
> index ef16c59..18a6f66 100644
> --- a/src/mesa/main/mtypes.h
> +++ b/src/mesa/main/mtypes.h
> @@ -1931,6 +1931,7 @@ struct gl_geometry_program
> GL_TRIANGLES, or GL_TRIANGLES_ADJACENCY_ARB */
> GLenum OutputType; /**< GL_POINTS, GL_LINE_STRIP or GL_TRIANGLE_STRIP */
> GLboolean UsesClipDistance;
> +   GLboolean UsesEndPrimitive;
>  };
>  
>  
> @@ -2364,6 +2365,7 @@ struct gl_shader_program
>GLboolean UsesClipDistance;
>GLuint ClipDistanceArraySize; /**< Size of the gl_ClipDistance array, 
> or
>   0 if not present. */
> +  GLboolean UsesEndPrimitive;
> } Geom;
>  
> /** Vertex shader state */
> diff --git a/src/mesa/main/shaderapi.c b/src/mesa/main/shaderapi.c
> index 4fe9d9c..a2386fb 100644
> --- a/src/mesa/main/shaderapi.c
> +++ b/src/mesa/main/shaderapi.c
> @@ -1872,6 +1872,7 @@ _mesa_copy_linked_program_data(gl_shader_type type,
>dst_gp->InputType = src->Geom.InputType;
>dst_gp->OutputType = src->Geom.OutputType;
>dst_gp->UsesClipDistance = src->Geom.UsesClipDistance;
> +  dst_gp->UsesEndPrimitive = src->Geom.UsesEndPrimitive;
> }
>break;
> default:
> 

We set some similar flags in ir_set_program_inouts.  Specifically, I'm
thinking about UsesDFdy.  someday, it might be nice to consolidate all
the "Does the shader use X?" flags in one place.

I'm fine with your code as is, though.  Progress.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/2] glsl: fix variadic macro for MSVC

2013-09-09 Thread Brian Paul

On 09/09/2013 05:31 PM, Kenneth Graunke wrote:

On 09/09/2013 04:03 PM, Brian Paul wrote:

MSVC doesn't accept the rest... syntax.
---
  src/glsl/builtin_functions.cpp |4 ++--
  1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/glsl/builtin_functions.cpp b/src/glsl/builtin_functions.cpp
index eca41aa..5d8f171 100644
--- a/src/glsl/builtin_functions.cpp
+++ b/src/glsl/builtin_functions.cpp
@@ -1935,9 +1935,9 @@ builtin_builder::new_sig(const glsl_type *return_type,
 return sig;
  }

-#define MAKE_SIG(return_type, avail, rest...) \
+#define MAKE_SIG(return_type, avail, ...)  \
 ir_function_signature *sig =   \
-  new_sig(return_type, avail, rest);  \
+  new_sig(return_type, avail, __VA_ARGS__);  \
 ir_factory body(&sig->body, mem_ctx);

  ir_function_signature *


I always forget about this.  Sorry, Brian.


No problem.



Both patches are:
Reviewed-by: Kenneth Graunke 


Thanks.

-Brian


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] mesa: Ensure gl_sync_object is fully initialized.

2013-09-09 Thread Vinson Lee
278372b47e4db8a022d57f60302eec74819e9341 added the uninitialized pointer
field gl_sync_object:Label. A free of this pointer, added in commit
6d8dd59cf53d2f47b817d79204a52bb3a46e8c77, resulted in a crash.

This patch fixes piglit ARB_sync regressions with swrast introduced by
6d8dd59cf53d2f47b817d79204a52bb3a46e8c77.

Signed-off-by: Vinson Lee 
---
 src/mesa/main/syncobj.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/mesa/main/syncobj.c b/src/mesa/main/syncobj.c
index 92c7cb0..987d4f5 100644
--- a/src/mesa/main/syncobj.c
+++ b/src/mesa/main/syncobj.c
@@ -71,7 +71,7 @@
 static struct gl_sync_object *
 _mesa_new_sync_object(struct gl_context *ctx, GLenum type)
 {
-   struct gl_sync_object *s = MALLOC_STRUCT(gl_sync_object);
+   struct gl_sync_object *s = CALLOC_STRUCT(gl_sync_object);
(void) ctx;
(void) type;
 
-- 
1.8.1.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 4/9] i965/gs: Set control data header size/format appropriately for EndPrimitive().

2013-09-09 Thread Kenneth Graunke
On 09/09/2013 08:20 AM, Paul Berry wrote:
> The gen7 geometry shader uses a "control data header" at the beginning
> of the output URB entry to store either
> 
> (a) flag bits (1 bit/vertex) indicating whether EndPrimitive() was
> called after each vertex, or
> 
> (b) stream ID bits (2 bits/vertex) indicating which stream each vertex
> should be sent to (when multiple transform feedback streams are in
> use).
> 
> Fortunately, OpenGL only requires separate streams to be supported
> when the output type is points, and EndPrimitive() only has an effect
> when the input type is line_strip or triangle_strip, so it's not a
> problem that these two uses of the control data header are mutually
> exclusive.
> 
> This patch modifies do_vec4_gs_prog() to determine the correct
> hardware settings for configuring the control data header, and
> modifies upload_gs_state() to propagate these settings to the
> hardware.
> 
> In addition, it modifies do_vec4_gs_prog() to ensure that the output
> URB entry is large enough to contain both the output vertices *and*
> the control data header.
> 
> Finally, it modifies vec4_gs_visitor so that it accounts for the size
> of the control data header when computing the offset within the URB
> where output vertex data should be stored.
> ---
>  src/mesa/drivers/dri/i965/brw_context.h   | 14 ++
>  src/mesa/drivers/dri/i965/brw_defines.h   |  4 +++
>  src/mesa/drivers/dri/i965/brw_vec4_gs.c   | 33 
> +++
>  src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp |  1 +
>  src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.h   |  3 +++
>  src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp|  2 +-
>  src/mesa/drivers/dri/i965/gen7_gs_state.c |  4 +++
>  7 files changed, 60 insertions(+), 1 deletion(-)
> 
> diff --git a/src/mesa/drivers/dri/i965/brw_context.h 
> b/src/mesa/drivers/dri/i965/brw_context.h
> index 57f086b..c566bba 100644
> --- a/src/mesa/drivers/dri/i965/brw_context.h
> +++ b/src/mesa/drivers/dri/i965/brw_context.h
> @@ -548,6 +548,20 @@ struct brw_gs_prog_data
> unsigned output_vertex_size_hwords;
>  
> unsigned output_topology;
> +
> +   /**
> +* Size of the control data (cut bits or StreamID bits), in hwords (32
> +* bytes).  0 if there is no control data.
> +*/
> +   unsigned control_data_header_size_hwords;
> +
> +   /**
> +* Format of the control data (either 
> GEN7_GS_CONTROL_DATA_FORMAT_GSCTL_SID
> +* if the control data is StreamID bits, or
> +* GEN7_GS_CONTROL_DATA_FORMAT_GSCTL_CUT if the control data is cut bits).
> +* Ignored if control_data_header_size is 0.
> +*/
> +   unsigned control_data_format;
>  };
>  
>  /** Number of texture sampler units */
> diff --git a/src/mesa/drivers/dri/i965/brw_defines.h 
> b/src/mesa/drivers/dri/i965/brw_defines.h
> index 0406c4d..6db2570 100644
> --- a/src/mesa/drivers/dri/i965/brw_defines.h
> +++ b/src/mesa/drivers/dri/i965/brw_defines.h
> @@ -1337,6 +1337,10 @@ enum brw_message_target {
>  /* DW5 */
>  # define GEN6_GS_MAX_THREADS_SHIFT   25
>  # define HSW_GS_MAX_THREADS_SHIFT24
> +# define GEN7_GS_CONTROL_DATA_FORMAT_SHIFT   24
> +# define GEN7_GS_CONTROL_DATA_FORMAT_GSCTL_CUT   0
> +# define GEN7_GS_CONTROL_DATA_FORMAT_GSCTL_SID   1
> +# define GEN7_GS_CONTROL_DATA_HEADER_SIZE_SHIFT  20

This won't work for Haswell (note the overlap GSCTL and MAX_THREADS).

Apparently GSCTL is stored in DW6 at bit 31 on Haswell.

I think it probably makes sense to address that in this patch.

Otherwise this looks fine.

>  # define GEN7_GS_DISPATCH_MODE_SINGLE(0 << 11)
>  # define GEN7_GS_DISPATCH_MODE_DUAL_INSTANCE (1 << 11)
>  # define GEN7_GS_DISPATCH_MODE_DUAL_OBJECT   (2 << 11)
> diff --git a/src/mesa/drivers/dri/i965/brw_vec4_gs.c 
> b/src/mesa/drivers/dri/i965/brw_vec4_gs.c
> index 7ab03ac..f67ae2b 100644
> --- a/src/mesa/drivers/dri/i965/brw_vec4_gs.c
> +++ b/src/mesa/drivers/dri/i965/brw_vec4_gs.c
> @@ -62,6 +62,38 @@ do_gs_prog(struct brw_context *brw,
> c.prog_data.base.param = rzalloc_array(NULL, const float *, param_count);
> c.prog_data.base.pull_param = rzalloc_array(NULL, const float *, 
> param_count);
>  
> +   if (gp->program.OutputType == GL_POINTS) {
> +  /* When the output type is points, the geometry shader may output data
> +   * to multiple streams, and EndPrimitive() has no effect.  So we
> +   * configure the hardware to interpret the control data as stream ID.
> +   */
> +  c.prog_data.control_data_format = 
> GEN7_GS_CONTROL_DATA_FORMAT_GSCTL_SID;
> +
> +  /* However, StreamID is not yet supported, so we output zero bits of
> +   * control data per vertex.
> +   */
> +  c.control_data_bits_per_vertex = 0;
> +   } else {
> +  /* When the output type is triangle_strip or line_strip, EndPrimitive()
> +   * may be used to terminate the current st

[Mesa-dev] [PATCH] mesa: Use correct enum conversion function.

2013-09-09 Thread Vinson Lee
Fixes "Mixing enum types" defect reported by Coverity.

Signed-off-by: Vinson Lee 
---
 src/mesa/main/errors.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/mesa/main/errors.c b/src/mesa/main/errors.c
index e1a9fe2..28357e0 100644
--- a/src/mesa/main/errors.c
+++ b/src/mesa/main/errors.c
@@ -856,7 +856,7 @@ _mesa_PushDebugGroup(GLenum source, GLuint id, GLsizei 
length,
   length = strlen(message);
emptySlot = &ctx->Debug.DebugGroupMsgs[ctx->Debug.GroupStackDepth];
store_message_details(emptySlot, gl_enum_to_debug_source(source),
- gl_enum_to_debug_source(GL_DEBUG_TYPE_PUSH_GROUP),
+ gl_enum_to_debug_type(GL_DEBUG_TYPE_PUSH_GROUP),
  id,
gl_enum_to_debug_severity(GL_DEBUG_SEVERITY_NOTIFICATION),
  length, message);
-- 
1.8.1.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] regression on nvc0 since floating point compare instructions

2013-09-09 Thread Dave Airlie
Hey,

so virgl stopped working on nouveau the other day and I bisected it to
the enable of the floating point compare instructions in the state
tracker,

I've attached a shader runner file that makes it hang,

Dave.


nouveau-float-compare-regression.shader_test
Description: Binary data
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] regression on nvc0 since floating point compare instructions

2013-09-09 Thread Dave Airlie
On Tue, Sep 10, 2013 at 11:59 AM, Dave Airlie  wrote:
> Hey,
>
> so virgl stopped working on nouveau the other day and I bisected it to
> the enable of the floating point compare instructions in the state
> tracker,
>
> I've attached a shader runner file that makes it hang,

As usual 5 secs after pressing send I had an insight,

the attached patch seems to fix it here for me.

Dave.


0001-nouveau-fix-regression-since-float-comparison-instru.patch
Description: Binary data
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] radeonsi: Add parentheses around '|' operands.

2013-09-09 Thread Vinson Lee
Fixes GCC parentheses warning.

r600_texture.c: In function 'si_texture_create':
r600_texture.c:518:20: warning: suggest parentheses around arithmetic in 
operand of '|' [-Wparentheses]
  !(templ->bind & PIPE_BIND_CURSOR | PIPE_BIND_LINEAR)) {
^

Fixes "Wrong operator used" defect reported by Coverity.

Signed-off-by: Vinson Lee 
---
 src/gallium/drivers/radeonsi/r600_texture.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/gallium/drivers/radeonsi/r600_texture.c 
b/src/gallium/drivers/radeonsi/r600_texture.c
index ef4e865..e6e8e20 100644
--- a/src/gallium/drivers/radeonsi/r600_texture.c
+++ b/src/gallium/drivers/radeonsi/r600_texture.c
@@ -515,7 +515,7 @@ struct pipe_resource *si_texture_create(struct pipe_screen 
*screen,
int r;
 
if (!(templ->flags & R600_RESOURCE_FLAG_TRANSFER) &&
-   !(templ->bind & PIPE_BIND_CURSOR | PIPE_BIND_LINEAR)) {
+   !(templ->bind & (PIPE_BIND_CURSOR | PIPE_BIND_LINEAR))) {
if (templ->flags & R600_RESOURCE_FLAG_FORCE_TILING ||
templ->nr_samples > 1) {
array_mode = V_009910_ARRAY_2D_TILED_THIN1;
-- 
1.8.3.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] glsl: Add missing va_end in builtin_builder::add_function.

2013-09-09 Thread Vinson Lee
Fixes "Missing varargs init or cleanup" defect reported by Coverity.

Signed-off-by: Vinson Lee 
---
 src/glsl/builtin_functions.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/glsl/builtin_functions.cpp b/src/glsl/builtin_functions.cpp
index 5d8f171..39127e6 100644
--- a/src/glsl/builtin_functions.cpp
+++ b/src/glsl/builtin_functions.cpp
@@ -1850,6 +1850,7 @@ builtin_builder::add_function(const char *name, ...)
 
   f->add_signature(sig);
}
+   va_end(ap);
 
shader->symbols->add_function(f);
 }
-- 
1.8.1.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] glsl: Initialize builtin_builder member variables.

2013-09-09 Thread Vinson Lee
Fixes "Uninitialized pointer field" defect reported by Coverity.

Signed-off-by: Vinson Lee 
---
 src/glsl/builtin_functions.cpp | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/glsl/builtin_functions.cpp b/src/glsl/builtin_functions.cpp
index 39127e6..ce78df1 100644
--- a/src/glsl/builtin_functions.cpp
+++ b/src/glsl/builtin_functions.cpp
@@ -525,6 +525,9 @@ private:
  *  @{
  */
 builtin_builder::builtin_builder()
+   : shader(NULL),
+ gl_ModelViewProjectionMatrix(NULL),
+ gl_Vertex(NULL)
 {
mem_ctx = NULL;
 }
-- 
1.8.1.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] regression on nvc0 since floating point compare instructions

2013-09-09 Thread Dave Airlie
On Tue, Sep 10, 2013 at 12:04 PM, Dave Airlie  wrote:
> On Tue, Sep 10, 2013 at 11:59 AM, Dave Airlie  wrote:
>> Hey,
>>
>> so virgl stopped working on nouveau the other day and I bisected it to
>> the enable of the floating point compare instructions in the state
>> tracker,
>>
>> I've attached a shader runner file that makes it hang,
>
> As usual 5 secs after pressing send I had an insight,
>
> the attached patch seems to fix it here for me.

Okay its a bit wierder than that, found another bunch of regressions,

Here's another shader test that regression from 9.2 to master on nvc0.

Dave.


nouveau-virgl-compare-regression.shader_test
Description: Binary data
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] i965/gen7: always lower textureGrad() on gen7

2013-09-09 Thread Chia-I Wu
On Tue, Sep 10, 2013 at 3:48 AM, Ian Romanick  wrote:
> On 09/05/2013 08:57 AM, Chia-I Wu wrote:
>> On Thu, Sep 5, 2013 at 5:12 PM, Chris Forbes  wrote:
>>> A possible explanation for the perf change is that Xonotic uses
>>> anisotropic filtering at this quality level. Lowering to txl defeats
>>> it.
>> I had a look at that.  gl_sampler->MaxAnisotropy is never greater than
>> 1.0 in gen7_update_sampler_state() so there is no anisotropic
>> filtering in this case.
>>
>> It makes sense to me that avoiding punting to SIMD8 helps the
>> performance.  But it is not clear to me why >10% performance change
>> can still be observed when INTEL_DEBUG=no16 is specified.  A
>> reasonable explanation is that the image quality is degraded in some
>> way, which is why I am still nervous about the change.
>>
>> An alternative approach to avoid punting seems to emulate SIMD16
>> sample_d with two SIMD8 sample_d.  It will take longer to implement
>> given my familiarity with the code, and may be less performant.  BUt
>> that would allow things like anisotropic filtering to be honored.
>>
>>
>>> It would be worth doing an image quality comparison before and after the 
>>> change.
>> Yeah, that is worth doing.  I will do that.
>
> Any results?  Still waiting...
There is no difference in image quality as far as I can tell.  Here
are the screenshots of every 100 frames before and after the change

  https://www.dropbox.com/s/mdqh0e42sf0xfro/compare-textureGrad-lowering.tar.gz
(297MB)

They are taken with the game's built-in mechanism and effects such as
bullets or explosions are off a bit between runs.

>>> -- Chris
>>>
>>> On Thu, Sep 5, 2013 at 8:35 PM, Chia-I Wu  wrote:
 sample_d is slower than the lowered version on gen7.  For gen7, this 
 improves
 Xonotic benchmark with Ultimate effects by as much as 25%:

  before the change:  40.06 fps
  after the change:   51.10 fps
  after the change with INTEL_DEBUG=no16: 44.46 fps

 As sample_d is not allowed in SIMD16 mode, I firstly thought the difference
 was from SIMD8 versus SIMD16.  If that was the case, we would want to apply
 brw_lower_texture_gradients() only on fragment shaders in SIMD16 mode.

 But, as the numbers show, there is still 10% improvement when SIMD16 is 
 forced
 off after the change.  Thus textureGrad() is lowered unconditionally for 
 now.
 Due to this and that I haven't tried it on Haswell, this is still RFC.

 No piglit regressions.

 Signed-off-by: Chia-I Wu 
 ---
  .../dri/i965/brw_lower_texture_gradients.cpp   | 54 
 ++
  1 file changed, 36 insertions(+), 18 deletions(-)

 diff --git a/src/mesa/drivers/dri/i965/brw_lower_texture_gradients.cpp 
 b/src/mesa/drivers/dri/i965/brw_lower_texture_gradients.cpp
 index 1589a20..f3fcb56 100644
 --- a/src/mesa/drivers/dri/i965/brw_lower_texture_gradients.cpp
 +++ b/src/mesa/drivers/dri/i965/brw_lower_texture_gradients.cpp
 @@ -34,8 +34,8 @@ using namespace ir_builder;

  class lower_texture_grad_visitor : public ir_hierarchical_visitor {
  public:
 -   lower_texture_grad_visitor(bool has_sample_d_c)
 -  : has_sample_d_c(has_sample_d_c)
 +   lower_texture_grad_visitor(bool has_sample_d, bool has_sample_d_c)
 +  : has_sample_d(has_sample_d), has_sample_d_c(has_sample_d_c)
 {
progress = false;
 }
 @@ -44,6 +44,7 @@ public:


 bool progress;
 +   bool has_sample_d;
 bool has_sample_d_c;

  private:
 @@ -90,22 +91,33 @@ txs_type(const glsl_type *type)
  ir_visitor_status
  lower_texture_grad_visitor::visit_leave(ir_texture *ir)
  {
 -   /* Only lower textureGrad with shadow samplers */
 -   if (ir->op != ir_txd || !ir->shadow_comparitor)
 +   if (ir->op != ir_txd)
return visit_continue;

 -   /* Lower textureGrad() with samplerCubeShadow even if we have the 
 sample_d_c
 -* message.  GLSL provides gradients for the 'r' coordinate.  
 Unfortunately:
 -*
 -* From the Ivybridge PRM, Volume 4, Part 1, sample_d message 
 description:
 -* "The r coordinate contains the faceid, and the r gradients are 
 ignored
 -*  by hardware."
 -*
 -* We likely need to do a similar treatment for samplerCube and
 -* samplerCubeArray, but we have insufficient testing for that at the 
 moment.
 -*/
 -   bool need_lowering = !has_sample_d_c ||
 -  ir->sampler->type->sampler_dimensionality == GLSL_SAMPLER_DIM_CUBE;
 +   bool need_lowering = false;
 +
 +   if (ir->shadow_comparitor) {
 +  /* Lower textureGrad() with samplerCubeShadow even if we have the
 +   * sample_d_c message.  GLSL provides gradients for the 'r' 
 coordinate.
 +   * Unfortunately:
 +   *

Re: [Mesa-dev] [PATCH] i965/gen7: always lower textureGrad() on gen7

2013-09-09 Thread Chia-I Wu
On Tue, Sep 10, 2013 at 4:01 AM, Ian Romanick  wrote:
> On 09/06/2013 05:05 AM, Chia-I Wu wrote:
>> On Thu, Sep 5, 2013 at 9:57 PM, Chia-I Wu  wrote:
>>> On Thu, Sep 5, 2013 at 5:12 PM, Chris Forbes  wrote:
 A possible explanation for the perf change is that Xonotic uses
 anisotropic filtering at this quality level. Lowering to txl defeats
 it.
>>> I had a look at that.  gl_sampler->MaxAnisotropy is never greater than
>>> 1.0 in gen7_update_sampler_state() so there is no anisotropic
>>> filtering in this case.
>>>
>>> It makes sense to me that avoiding punting to SIMD8 helps the
>>> performance.  But it is not clear to me why >10% performance change
>>> can still be observed when INTEL_DEBUG=no16 is specified.  A
>>> reasonable explanation is that the image quality is degraded in some
>>> way, which is why I am still nervous about the change.
>> With INTEL_DEBUG=no16 set, the same trick hurts the performance on
>> Haswell by about 5%.  That is, sample_d on Haswell is faster than the
>> one emulated with sample_l.
>
> What is the delta if sample_d is used for just SIMD8 shaders on HSW?
> Even when the shader can go SIMD16, some fragments will use the SIMD8 path.
brw_lower_texture_gradients applies on the IR so it is hard to
selectively apply it only for SIMD16 fs.  I will see if I can work
something out here to get the numbers you need.


>> But since the trick makes SIMD16 possible, it gains 5% more fps when
>> INTEL_DEBUG=no16 is not set.
>>
>>> An alternative approach to avoid punting seems to emulate SIMD16
>>> sample_d with two SIMD8 sample_d.  It will take longer to implement
>>> given my familiarity with the code, and may be less performant.  BUt
>>> that would allow things like anisotropic filtering to be honored.
>>>
>>>
 It would be worth doing an image quality comparison before and after the 
 change.
>>> Yeah, that is worth doing.  I will do that.
>>>

 -- Chris

 On Thu, Sep 5, 2013 at 8:35 PM, Chia-I Wu  wrote:
> sample_d is slower than the lowered version on gen7.  For gen7, this 
> improves
> Xonotic benchmark with Ultimate effects by as much as 25%:
>
>  before the change:  40.06 fps
>  after the change:   51.10 fps
>  after the change with INTEL_DEBUG=no16: 44.46 fps
>
> As sample_d is not allowed in SIMD16 mode, I firstly thought the 
> difference
> was from SIMD8 versus SIMD16.  If that was the case, we would want to 
> apply
> brw_lower_texture_gradients() only on fragment shaders in SIMD16 mode.
>
> But, as the numbers show, there is still 10% improvement when SIMD16 is 
> forced
> off after the change.  Thus textureGrad() is lowered unconditionally for 
> now.
> Due to this and that I haven't tried it on Haswell, this is still RFC.
>
> No piglit regressions.
>
> Signed-off-by: Chia-I Wu 
> ---
>  .../dri/i965/brw_lower_texture_gradients.cpp   | 54 
> ++
>  1 file changed, 36 insertions(+), 18 deletions(-)
>
> diff --git a/src/mesa/drivers/dri/i965/brw_lower_texture_gradients.cpp 
> b/src/mesa/drivers/dri/i965/brw_lower_texture_gradients.cpp
> index 1589a20..f3fcb56 100644
> --- a/src/mesa/drivers/dri/i965/brw_lower_texture_gradients.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_lower_texture_gradients.cpp
> @@ -34,8 +34,8 @@ using namespace ir_builder;
>
>  class lower_texture_grad_visitor : public ir_hierarchical_visitor {
>  public:
> -   lower_texture_grad_visitor(bool has_sample_d_c)
> -  : has_sample_d_c(has_sample_d_c)
> +   lower_texture_grad_visitor(bool has_sample_d, bool has_sample_d_c)
> +  : has_sample_d(has_sample_d), has_sample_d_c(has_sample_d_c)
> {
>progress = false;
> }
> @@ -44,6 +44,7 @@ public:
>
>
> bool progress;
> +   bool has_sample_d;
> bool has_sample_d_c;
>
>  private:
> @@ -90,22 +91,33 @@ txs_type(const glsl_type *type)
>  ir_visitor_status
>  lower_texture_grad_visitor::visit_leave(ir_texture *ir)
>  {
> -   /* Only lower textureGrad with shadow samplers */
> -   if (ir->op != ir_txd || !ir->shadow_comparitor)
> +   if (ir->op != ir_txd)
>return visit_continue;
>
> -   /* Lower textureGrad() with samplerCubeShadow even if we have the 
> sample_d_c
> -* message.  GLSL provides gradients for the 'r' coordinate.  
> Unfortunately:
> -*
> -* From the Ivybridge PRM, Volume 4, Part 1, sample_d message 
> description:
> -* "The r coordinate contains the faceid, and the r gradients are 
> ignored
> -*  by hardware."
> -*
> -* We likely need to do a similar treatment for samplerCube and
> -* samplerCubeArray, but we have insufficient testing for that at the 
> moment.
> -*/
> -   bool need_lo

Re: [Mesa-dev] [PATCH] i965/gen7: always lower textureGrad() on gen7

2013-09-09 Thread Chia-I Wu
On Tue, Sep 10, 2013 at 4:05 AM, Ian Romanick  wrote:
> On 09/05/2013 03:35 AM, Chia-I Wu wrote:
>> sample_d is slower than the lowered version on gen7.  For gen7, this improves
>> Xonotic benchmark with Ultimate effects by as much as 25%:
>>
>>  before the change:  40.06 fps
>>  after the change:   51.10 fps
>>  after the change with INTEL_DEBUG=no16: 44.46 fps
>>
>> As sample_d is not allowed in SIMD16 mode, I firstly thought the difference
>> was from SIMD8 versus SIMD16.  If that was the case, we would want to apply
>> brw_lower_texture_gradients() only on fragment shaders in SIMD16 mode.
>>
>> But, as the numbers show, there is still 10% improvement when SIMD16 is 
>> forced
>> off after the change.  Thus textureGrad() is lowered unconditionally for now.
>> Due to this and that I haven't tried it on Haswell, this is still RFC.
>
> A lot of this code depends on the texture targets being used.  What
> texture targets is Xonotic using with textureGrad?
Only sampler2D.
>
>> No piglit regressions.
>>
>> Signed-off-by: Chia-I Wu 
>> ---
>>  .../dri/i965/brw_lower_texture_gradients.cpp   | 54 
>> ++
>>  1 file changed, 36 insertions(+), 18 deletions(-)
>>
>> diff --git a/src/mesa/drivers/dri/i965/brw_lower_texture_gradients.cpp 
>> b/src/mesa/drivers/dri/i965/brw_lower_texture_gradients.cpp
>> index 1589a20..f3fcb56 100644
>> --- a/src/mesa/drivers/dri/i965/brw_lower_texture_gradients.cpp
>> +++ b/src/mesa/drivers/dri/i965/brw_lower_texture_gradients.cpp
>> @@ -34,8 +34,8 @@ using namespace ir_builder;
>>
>>  class lower_texture_grad_visitor : public ir_hierarchical_visitor {
>>  public:
>> -   lower_texture_grad_visitor(bool has_sample_d_c)
>> -  : has_sample_d_c(has_sample_d_c)
>> +   lower_texture_grad_visitor(bool has_sample_d, bool has_sample_d_c)
>> +  : has_sample_d(has_sample_d), has_sample_d_c(has_sample_d_c)
>> {
>>progress = false;
>> }
>> @@ -44,6 +44,7 @@ public:
>>
>>
>> bool progress;
>> +   bool has_sample_d;
>> bool has_sample_d_c;
>>
>>  private:
>> @@ -90,22 +91,33 @@ txs_type(const glsl_type *type)
>>  ir_visitor_status
>>  lower_texture_grad_visitor::visit_leave(ir_texture *ir)
>>  {
>> -   /* Only lower textureGrad with shadow samplers */
>> -   if (ir->op != ir_txd || !ir->shadow_comparitor)
>> +   if (ir->op != ir_txd)
>>return visit_continue;
>>
>> -   /* Lower textureGrad() with samplerCubeShadow even if we have the 
>> sample_d_c
>> -* message.  GLSL provides gradients for the 'r' coordinate.  
>> Unfortunately:
>> -*
>> -* From the Ivybridge PRM, Volume 4, Part 1, sample_d message 
>> description:
>> -* "The r coordinate contains the faceid, and the r gradients are ignored
>> -*  by hardware."
>> -*
>> -* We likely need to do a similar treatment for samplerCube and
>> -* samplerCubeArray, but we have insufficient testing for that at the 
>> moment.
>> -*/
>> -   bool need_lowering = !has_sample_d_c ||
>> -  ir->sampler->type->sampler_dimensionality == GLSL_SAMPLER_DIM_CUBE;
>> +   bool need_lowering = false;
>> +
>> +   if (ir->shadow_comparitor) {
>> +  /* Lower textureGrad() with samplerCubeShadow even if we have the
>> +   * sample_d_c message.  GLSL provides gradients for the 'r' 
>> coordinate.
>> +   * Unfortunately:
>> +   *
>> +   * From the Ivybridge PRM, Volume 4, Part 1, sample_d message
>> +   * description: "The r coordinate contains the faceid, and the r
>> +   * gradients are ignored by hardware."
>> +   */
>> +  if (ir->sampler->type->sampler_dimensionality == 
>> GLSL_SAMPLER_DIM_CUBE)
>> + need_lowering = true;
>> +  else if (!has_sample_d_c)
>> + need_lowering = true;
>
> This should look like the old code:
>
> need_lowering = !has_sample_d_c ||
>ir->sampler->type->sampler_dimensionality == GLSL_SAMPLER_DIM_CUBE;
Sure.  I moved it so that it is clear the comments are for the first if-block.
>> +   }
>> +   else {
>> +  /* We likely need to do a similar treatment for samplerCube and
>> +   * samplerCubeArray, but we have insufficient testing for that at the
>> +   * moment.
>> +   */
>> +  if (!has_sample_d)
>> + need_lowering = true;
>
> need_lowering = !has_sample_d;
Will do.
>
>> +   }
>>
>> if (!need_lowering)
>>return visit_continue;
>> @@ -154,7 +166,9 @@ lower_texture_grad_visitor::visit_leave(ir_texture *ir)
>>  expr(ir_unop_sqrt, dot(dPdy, dPdy)));
>> }
>>
>> -   /* lambda_base = log2(rho).  We're ignoring GL state biases for now. */
>> +   /* lambda_base = log2(rho).  It will be biased and clamped by values
>> +* defined in SAMPLER_STATE to get the final lambda.
>> +*/
>> ir->op = ir_txl;
>> ir->lod_info.lod = expr(ir_unop_log2, rho);
>>
>> @@ -168,8 +182,12 @@ bool
>>  brw_lower_texture_gradients(struct brw_context *brw,
>>  s

Re: [Mesa-dev] [PATCH] glsl: Initialize builtin_builder member variables.

2013-09-09 Thread Kenneth Graunke
On 09/09/2013 08:48 PM, Vinson Lee wrote:
> Fixes "Uninitialized pointer field" defect reported by Coverity.
> 
> Signed-off-by: Vinson Lee 
> ---
>  src/glsl/builtin_functions.cpp | 3 +++
>  1 file changed, 3 insertions(+)
> 
> diff --git a/src/glsl/builtin_functions.cpp b/src/glsl/builtin_functions.cpp
> index 39127e6..ce78df1 100644
> --- a/src/glsl/builtin_functions.cpp
> +++ b/src/glsl/builtin_functions.cpp
> @@ -525,6 +525,9 @@ private:
>   *  @{
>   */
>  builtin_builder::builtin_builder()
> +   : shader(NULL),
> + gl_ModelViewProjectionMatrix(NULL),
> + gl_Vertex(NULL)
>  {
> mem_ctx = NULL;
>  }

I don't think this should matter, but it shouldn't hurt anything either.

Reviewed-by: Kenneth Graunke 

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] glsl: Add missing va_end in builtin_builder::add_function.

2013-09-09 Thread Kenneth Graunke
On 09/09/2013 08:34 PM, Vinson Lee wrote:
> Fixes "Missing varargs init or cleanup" defect reported by Coverity.
> 
> Signed-off-by: Vinson Lee 
> ---
>  src/glsl/builtin_functions.cpp | 1 +
>  1 file changed, 1 insertion(+)
> 
> diff --git a/src/glsl/builtin_functions.cpp b/src/glsl/builtin_functions.cpp
> index 5d8f171..39127e6 100644
> --- a/src/glsl/builtin_functions.cpp
> +++ b/src/glsl/builtin_functions.cpp
> @@ -1850,6 +1850,7 @@ builtin_builder::add_function(const char *name, ...)
>  
>f->add_signature(sig);
> }
> +   va_end(ap);
>  
> shader->symbols->add_function(f);
>  }
> 

Oops.   Thanks for catching this!

Reviewed-by: Kenneth Graunke 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 7/9] i965/gs: Add opcodes needed for EndPrimitive().

2013-09-09 Thread Kenneth Graunke
On 09/09/2013 08:20 AM, Paul Berry wrote:
> ---
>  src/mesa/drivers/dri/i965/brw_defines.h | 21 
>  src/mesa/drivers/dri/i965/brw_shader.cpp|  4 ++
>  src/mesa/drivers/dri/i965/brw_vec4.h|  2 +
>  src/mesa/drivers/dri/i965/brw_vec4_emit.cpp | 81 
> +
>  4 files changed, 108 insertions(+)
> 
> diff --git a/src/mesa/drivers/dri/i965/brw_defines.h 
> b/src/mesa/drivers/dri/i965/brw_defines.h
> index 4742103..7b53c68 100644
> --- a/src/mesa/drivers/dri/i965/brw_defines.h
> +++ b/src/mesa/drivers/dri/i965/brw_defines.h
> @@ -847,6 +847,27 @@ enum opcode {
>  * scratch reads and writes to operate correctly.
>  */
> GS_OPCODE_SET_DWORD_2_IMMED,
> +
> +   /**
> +* Prepare the dst register for storage in the "Channel Mask" fields of a
> +* URB_WRITE message header.
> +*
> +* DWORD 4 of dst is shifted left by 4 bits, so that later,
> +* GS_OPCODE_SET_CHANNEL_MASKS can OR DWORDs 0 and 4 together to form the
> +* final channel mask.
> +*/
> +   GS_OPCODE_PREPARE_CHANNEL_MASKS,
> +
> +   /**
> +* Set the "Channel Mask" fields of a URB_WRITE message header.
> +*
> +* - dst is the MRF containing the message header.
> +*
> +* - src.x is the channel mask, as prepared by
> +*   GS_OPCODE_PREPARE_CHANNEL_MASKS.  DWORDs 0 and 4 are OR'ed together 
> to
> +*   form the final channel mask.
> +*/
> +   GS_OPCODE_SET_CHANNEL_MASKS,
>  };
>  
>  #define BRW_PREDICATE_NONE 0
> diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp 
> b/src/mesa/drivers/dri/i965/brw_shader.cpp
> index e7dbdbe..53364a5 100644
> --- a/src/mesa/drivers/dri/i965/brw_shader.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_shader.cpp
> @@ -507,6 +507,10 @@ brw_instruction_name(enum opcode op)
>return "set_vertex_count";
> case GS_OPCODE_SET_DWORD_2_IMMED:
>return "set_dword_2_immed";
> +   case GS_OPCODE_PREPARE_CHANNEL_MASKS:
> +  return "prepare_channel_masks";
> +   case GS_OPCODE_SET_CHANNEL_MASKS:
> +  return "set_channel_masks";
>  
> default:
>/* Yes, this leaks.  It's in debug code, it should never occur, and if
> diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h 
> b/src/mesa/drivers/dri/i965/brw_vec4.h
> index c5101d3..cba5cd4 100644
> --- a/src/mesa/drivers/dri/i965/brw_vec4.h
> +++ b/src/mesa/drivers/dri/i965/brw_vec4.h
> @@ -610,6 +610,8 @@ private:
> void generate_gs_set_vertex_count(struct brw_reg dst,
>   struct brw_reg src);
> void generate_gs_set_dword_2_immed(struct brw_reg dst, struct brw_reg 
> src);
> +   void generate_gs_prepare_channel_masks(struct brw_reg dst);
> +   void generate_gs_set_channel_masks(struct brw_reg dst, struct brw_reg 
> src);
> void generate_oword_dual_block_offsets(struct brw_reg m1,
> struct brw_reg index);
> void generate_scratch_write(vec4_instruction *inst,
> diff --git a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp 
> b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
> index bf04bd9..12e1b50 100644
> --- a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
> @@ -516,6 +516,79 @@ vec4_generator::generate_gs_set_dword_2_immed(struct 
> brw_reg dst,
>  }
>  
>  void
> +vec4_generator::generate_gs_prepare_channel_masks(struct brw_reg dst)
> +{
> +   /* We want to left shift just DWORD 4 (the x component belonging to the
> +* second geometry shader invocation) by 4 bits.  So generate the
> +* instruction:
> +*
> +* shl(1) dst.4<1>UD dst.4<0,1,0>UD 4UD { align1 WE_all }
> +*/
> +   dst = suboffset(vec1(dst), 4);
> +   brw_push_insn_state(p);
> +   brw_set_access_mode(p, BRW_ALIGN_1);
> +   brw_set_mask_control(p, BRW_MASK_DISABLE);
> +   brw_SHL(p, dst, dst, brw_imm_ud(4));
> +   brw_pop_insn_state(p);
> +}
> +
> +void
> +vec4_generator::generate_gs_set_channel_masks(struct brw_reg dst,
> +  struct brw_reg src)
> +{
> +   /* From p21 of volume 4 part 2 of the Ivy Bridge PRM (2.4.3.1 Message
> +* Header: M0.5):
> +*
> +* 15 Vertex 1 DATA [3] / Vertex 0 DATA[7] Channel Mask
> +*
> +*When Swizzle Control = URB_INTERLEAVED this bit controls Vertex 
> 1
> +*DATA[3], when Swizzle Control = URB_NOSWIZZLE this bit controls
> +*Vertex 0 DATA[7].  This bit is ANDed with the corresponding
> +*channel enable to determine the final channel enable.  For the
> +*URB_READ_OWORD & URB_READ_HWORD messages, when final channel
> +*enable is 1 it indicates that Vertex 1 DATA [3] will be included
> +*in the writeback message.  For the URB_WRITE_OWORD &
> +*URB_WRITE_HWORD messages, when final channel enable is 1 it
> +*indicates that Vertex 1 DATA [3] will be written to the surface.
> +*
> +*0: Vertex 1 DATA [3] / Vertex 0 DATA[7]