Re: [Mesa-dev] [PATCH] u_vbuf: fix vb slot assignment for translated buffers

2015-10-12 Thread Nicolai Hähnle

On 09.10.2015 23:57, Marek Olšák wrote:

Do you still have commit access and should somebody else push this?


I have to figure out how to get somebody to pay attention to 
https://bugs.freedesktop.org/show_bug.cgi?id=92281


Feel free to push this in the meantime; otherwise I'll eventually do it 
once I have access again.


Nicolai



Marek

On Sun, Oct 4, 2015 at 2:19 PM, Marek Olšák  wrote:

Oh, I forgot this:

Reviewed-by: Marek Olšák 

Marek

On Sun, Oct 4, 2015 at 2:03 PM, Marek Olšák  wrote:

Nice catch. Please add this to the commit message:

Cc: mesa-sta...@lists.freedesktop.org

It will be automatically picked for 11.0 after you push it.

Marek

On Sun, Oct 4, 2015 at 12:09 PM, Nicolai Hähnle  wrote:

Vertex attributes of different categories (constant/per-instance/
per-vertex) go into different buffers for translation, and this is now
properly reflected in the vertex buffers passed to the driver.

Fixes e.g. piglit's point-vertex-id divisor test.
---
  src/gallium/auxiliary/util/u_vbuf.c | 1 +
  1 file changed, 1 insertion(+)

diff --git a/src/gallium/auxiliary/util/u_vbuf.c 
b/src/gallium/auxiliary/util/u_vbuf.c
index 3d2193c..b31ada1 100644
--- a/src/gallium/auxiliary/util/u_vbuf.c
+++ b/src/gallium/auxiliary/util/u_vbuf.c
@@ -544,6 +544,7 @@ u_vbuf_translate_find_free_vb_slots(struct u_vbuf *mgr,

   index = ffs(unused_vb_mask) - 1;
   fallback_vbs[type] = index;
+ unused_vb_mask &= ~(1 << index);
   /*printf("found slot=%i for type=%i\n", index, type);*/
}
 }
--
2.1.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] mesa: clamp MaxLevel for immutable textures at initialization

2015-10-22 Thread Nicolai Hähnle
The same clamping already happens for glTexParameteri. This change
also fixes a bug in mipmap generation, see
https://bugs.freedesktop.org/show_bug.cgi?id=91993

piglit test cases have been submitted for review (as additions to
arb_texture_storage-texture-storage and arb_texture_view-max-level).
---
 src/mesa/main/textureview.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/mesa/main/textureview.c b/src/mesa/main/textureview.c
index 04b7d73..b6eaa77 100644
--- a/src/mesa/main/textureview.c
+++ b/src/mesa/main/textureview.c
@@ -408,6 +408,8 @@ _mesa_set_texture_view_state(struct gl_context *ctx,
   texObj->NumLayers = 6;
   break;
}
+
+   texObj->MaxLevel = MIN2(texObj->MaxLevel, texObj->ImmutableLevels - 1);
 }
 
 /**
@@ -680,6 +682,7 @@ _mesa_TextureView(GLuint texture, GLenum target, GLuint 
origtexture,
texObj->NumLayers = newViewNumLayers;
texObj->Immutable = GL_TRUE;
texObj->ImmutableLevels = origTexObj->ImmutableLevels;
+   texObj->MaxLevel = MIN2(texObj->MaxLevel, texObj->ImmutableLevels - 1);
texObj->Target = target;
texObj->TargetIndex = _mesa_tex_target_to_index(ctx, target);
assert(texObj->TargetIndex < NUM_TEXTURE_TARGETS);
-- 
2.1.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 21/40] pipe-loader: wire up the 'static' drm pipe-loader

2015-10-22 Thread Nicolai Hähnle

On 18.10.2015 00:57, Emil Velikov wrote:

Add a list of driver descriptors and select one from the list, during
probe time.

As we'll need to have all the driver pipe_foo_screen_create() functions
provided externally (i.e. from another static lib) we need a separate
(non-inline) drm_helper, which contains the function declarations.

XXX: More than happy to rename things - header/functions/etc.

Signed-off-by: Emil Velikov 
---
  src/gallium/auxiliary/pipe-loader/Makefile.am  |   6 +-
  .../auxiliary/pipe-loader/pipe_loader_drm.c| 119 -
  .../auxiliary/target-helpers/drm_helper_public.h   |  34 ++
  3 files changed, 154 insertions(+), 5 deletions(-)
  create mode 100644 src/gallium/auxiliary/target-helpers/drm_helper_public.h

diff --git a/src/gallium/auxiliary/pipe-loader/Makefile.am 
b/src/gallium/auxiliary/pipe-loader/Makefile.am
index 6a4a667..7db4190 100644
--- a/src/gallium/auxiliary/pipe-loader/Makefile.am
+++ b/src/gallium/auxiliary/pipe-loader/Makefile.am
@@ -34,12 +34,12 @@ AM_CFLAGS += \
  libpipe_loader_static_la_SOURCES += \
$(DRM_SOURCES)

-libpipe_loader_dynamic_la_SOURCES += \
-   $(DRM_SOURCES)
-
  libpipe_loader_static_la_LIBADD = \
$(top_builddir)/src/loader/libloader.la

+libpipe_loader_dynamic_la_SOURCES += \
+   $(DRM_SOURCES)
+
  libpipe_loader_dynamic_la_LIBADD = \
$(top_builddir)/src/loader/libloader.la

diff --git a/src/gallium/auxiliary/pipe-loader/pipe_loader_drm.c 
b/src/gallium/auxiliary/pipe-loader/pipe_loader_drm.c
index 33274de..97e9dcb 100644
--- a/src/gallium/auxiliary/pipe-loader/pipe_loader_drm.c
+++ b/src/gallium/auxiliary/pipe-loader/pipe_loader_drm.c
@@ -36,6 +36,7 @@
  #include 

  #include "loader.h"
+#include "target-helpers/drm_helper_public.h"
  #include "state_tracker/drm_driver.h"
  #include "pipe_loader_priv.h"

@@ -51,7 +52,9 @@
  struct pipe_loader_drm_device {
 struct pipe_loader_device base;
 const struct drm_driver_descriptor *dd;
+#ifndef GALLIUM_STATIC_TARGETS
 struct util_dl_library *lib;
+#endif
 int fd;
  };

@@ -59,6 +62,103 @@ struct pipe_loader_drm_device {

  static const struct pipe_loader_ops pipe_loader_drm_ops;

+#ifdef GALLIUM_STATIC_TARGETS
+static const struct drm_conf_ret throttle_ret = {
+   DRM_CONF_INT,
+   {2},
+};
+
+static const struct drm_conf_ret share_fd_ret = {
+   DRM_CONF_BOOL,
+   {true},
+};
+
+static inline const struct drm_conf_ret *
+configuration_query(enum drm_conf conf)
+{
+   switch (conf) {
+   case DRM_CONF_THROTTLE:
+  return &throttle_ret;
+   case DRM_CONF_SHARE_FD:
+  return &share_fd_ret;
+   default:
+  break;
+   }
+   return NULL;
+}
+
+static const struct drm_driver_descriptor driver_descriptors[] = {
+{
+.name = "i915",
+.driver_name = "i915",
+.create_screen = pipe_i915_create_screen,
+.configuration = configuration_query,
+},
+{
+.name = "i965",
+.driver_name = "i915",
+.create_screen = pipe_ilo_create_screen,
+.configuration = configuration_query,
+},
+{
+.name = "nouveau",
+.driver_name = "nouveau",
+.create_screen = pipe_nouveau_create_screen,
+.configuration = configuration_query,
+},
+{
+.name = "r300",
+.driver_name = "radeon",
+.create_screen = pipe_r300_create_screen,
+.configuration = configuration_query,
+},
+{
+.name = "r600",
+.driver_name = "radeon",
+.create_screen = pipe_r600_create_screen,
+.configuration = configuration_query,
+},
+{
+.name = "radeonsi",
+.driver_name = "radeon",
+.create_screen = pipe_radeonsi_create_screen,
+.configuration = configuration_query,
+},
+{
+.name = "vmwgfx",
+.driver_name = "vmwgfx",
+.create_screen = pipe_vmwgfx_create_screen,
+.configuration = configuration_query,
+},
+{
+.name = "kgsl",
+.driver_name = "freedreno",
+.create_screen = pipe_freedreno_create_screen,
+.configuration = configuration_query,
+},
+{
+.name = "msm",
+.driver_name = "freedreno",
+.create_screen = pipe_freedreno_create_screen,
+.configuration = configuration_query,
+},
+{
+.name = "vc4",
+.driver_name = "vc4",
+.create_screen = pipe_vc4_create_screen,
+.configuration = configuration_query,
+},


I believe these should be guarded by the respective #if 
defined(GALLIUM_xxx).


I see that in patch 25 (target-helpers: add a non-inline drm_helper.h) 
you change the pipe_XXX_create_screen functions so that they return NULL 
if the corresponding driver has not been configured.


However, using #if guards here instead is bound to provide a clearer 
distinction between the "create_screen failed" and "driver missing" 
failure modes.


Cheers,
Nicolai


+#ifdef USE_VC4_SIMULATOR
+{
+.name = "i

Re: [Mesa-dev] [PATCH] mesa: clamp MaxLevel for immutable textures at initialization

2015-10-22 Thread Nicolai Hähnle

On 22.10.2015 15:57, Fredrik Höglund wrote:

On Thursday 22 October 2015, Nicolai Hähnle wrote:

The same clamping already happens for glTexParameteri. This change
also fixes a bug in mipmap generation, see
https://bugs.freedesktop.org/show_bug.cgi?id=91993


I don't think this patch is correct.  The ARB_texture_view specification
doesn't say that MaxLevel should be initialized to the value of
TEXTURE_IMMUTABLE_LEVELS, only that it's interpreted relative to
the view and not relative to the original data store.

Liam Middlebrook also pointed out recently that the clamping done
in glTexParameteri is in fact a bug:

http://lists.freedesktop.org/archives/piglit/2015-June/016342.html

The language in the specification that says that MaxLevel is clamped
when the texture is immutable applies to texture minification,
magnification, and texture completeness; not to gl*Tex*Parameter*.


Ugh. I was torn between those two interpretations. I suppose nobody was 
confident enough to change gl*Tex*Parameter* either ;)


Thinking more on this, there is also a problematic interaction between 
glTextureView and glGenerate*Mipmap when the view does not extend to the 
highest level in the underlying texture. Clearly, this part of the spec 
could use some cleanups.


Any chance of an "official" clarification? I did not find corresponding 
Issues in the corresponding extensions. What are non-Mesa drivers doing?


Cheers,
Nicolai
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 21/40] pipe-loader: wire up the 'static' drm pipe-loader

2015-10-22 Thread Nicolai Hähnle

On 22.10.2015 17:32, Emil Velikov wrote:

On 22 October 2015 at 15:07, Nicolai Hähnle  wrote:

On 18.10.2015 00:57, Emil Velikov wrote:


Add a list of driver descriptors and select one from the list, during
probe time.

As we'll need to have all the driver pipe_foo_screen_create() functions
provided externally (i.e. from another static lib) we need a separate
(non-inline) drm_helper, which contains the function declarations.

XXX: More than happy to rename things - header/functions/etc.

Signed-off-by: Emil Velikov 
---
   src/gallium/auxiliary/pipe-loader/Makefile.am  |   6 +-
   .../auxiliary/pipe-loader/pipe_loader_drm.c| 119
-
   .../auxiliary/target-helpers/drm_helper_public.h   |  34 ++
   3 files changed, 154 insertions(+), 5 deletions(-)
   create mode 100644
src/gallium/auxiliary/target-helpers/drm_helper_public.h

diff --git a/src/gallium/auxiliary/pipe-loader/Makefile.am
b/src/gallium/auxiliary/pipe-loader/Makefile.am
index 6a4a667..7db4190 100644
--- a/src/gallium/auxiliary/pipe-loader/Makefile.am
+++ b/src/gallium/auxiliary/pipe-loader/Makefile.am
@@ -34,12 +34,12 @@ AM_CFLAGS += \
   libpipe_loader_static_la_SOURCES += \
 $(DRM_SOURCES)

-libpipe_loader_dynamic_la_SOURCES += \
-   $(DRM_SOURCES)
-
   libpipe_loader_static_la_LIBADD = \
 $(top_builddir)/src/loader/libloader.la

+libpipe_loader_dynamic_la_SOURCES += \
+   $(DRM_SOURCES)
+
   libpipe_loader_dynamic_la_LIBADD = \
 $(top_builddir)/src/loader/libloader.la

diff --git a/src/gallium/auxiliary/pipe-loader/pipe_loader_drm.c
b/src/gallium/auxiliary/pipe-loader/pipe_loader_drm.c
index 33274de..97e9dcb 100644
--- a/src/gallium/auxiliary/pipe-loader/pipe_loader_drm.c
+++ b/src/gallium/auxiliary/pipe-loader/pipe_loader_drm.c
@@ -36,6 +36,7 @@
   #include 

   #include "loader.h"
+#include "target-helpers/drm_helper_public.h"
   #include "state_tracker/drm_driver.h"
   #include "pipe_loader_priv.h"

@@ -51,7 +52,9 @@
   struct pipe_loader_drm_device {
  struct pipe_loader_device base;
  const struct drm_driver_descriptor *dd;
+#ifndef GALLIUM_STATIC_TARGETS
  struct util_dl_library *lib;
+#endif
  int fd;
   };

@@ -59,6 +62,103 @@ struct pipe_loader_drm_device {

   static const struct pipe_loader_ops pipe_loader_drm_ops;

+#ifdef GALLIUM_STATIC_TARGETS
+static const struct drm_conf_ret throttle_ret = {
+   DRM_CONF_INT,
+   {2},
+};
+
+static const struct drm_conf_ret share_fd_ret = {
+   DRM_CONF_BOOL,
+   {true},
+};
+
+static inline const struct drm_conf_ret *
+configuration_query(enum drm_conf conf)
+{
+   switch (conf) {
+   case DRM_CONF_THROTTLE:
+  return &throttle_ret;
+   case DRM_CONF_SHARE_FD:
+  return &share_fd_ret;
+   default:
+  break;
+   }
+   return NULL;
+}
+
+static const struct drm_driver_descriptor driver_descriptors[] = {
+{
+.name = "i915",
+.driver_name = "i915",
+.create_screen = pipe_i915_create_screen,
+.configuration = configuration_query,
+},
+{
+.name = "i965",
+.driver_name = "i915",
+.create_screen = pipe_ilo_create_screen,
+.configuration = configuration_query,
+},
+{
+.name = "nouveau",
+.driver_name = "nouveau",
+.create_screen = pipe_nouveau_create_screen,
+.configuration = configuration_query,
+},
+{
+.name = "r300",
+.driver_name = "radeon",
+.create_screen = pipe_r300_create_screen,
+.configuration = configuration_query,
+},
+{
+.name = "r600",
+.driver_name = "radeon",
+.create_screen = pipe_r600_create_screen,
+.configuration = configuration_query,
+},
+{
+.name = "radeonsi",
+.driver_name = "radeon",
+.create_screen = pipe_radeonsi_create_screen,
+.configuration = configuration_query,
+},
+{
+.name = "vmwgfx",
+.driver_name = "vmwgfx",
+.create_screen = pipe_vmwgfx_create_screen,
+.configuration = configuration_query,
+},
+{
+.name = "kgsl",
+.driver_name = "freedreno",
+.create_screen = pipe_freedreno_create_screen,
+.configuration = configuration_query,
+},
+{
+.name = "msm",
+.driver_name = "freedreno",
+.create_screen = pipe_freedreno_create_screen,
+.configuration = configuration_query,
+},
+{
+.name = "vc4",
+.driver_name = "vc4",
+.create_screen = pipe_vc4_create_screen,
+.configuration = configuration_query,
+},



I believe these should be guarded by the respective #if
defined(GALLIUM_xxx).

I see that in patch 25 (target-helpers: add a non-inline 

[Mesa-dev] [PATCH] st/mesa: fix mipmap generation for immutable textures with incomplete pyramids

2015-10-22 Thread Nicolai Hähnle
(This is an alternative to my previous patch, "mesa: clamp MaxLevel for
immutable textures at initialization"; this patch has no opinion about
how the spec should be interpreted.)

Without the clamping by NumLevels, the state tracker would reallocate the
texture storage (incorrect) and even fail to copy the base level image
after reallocation, leading to the graphical glitch of
https://bugs.freedesktop.org/show_bug.cgi?id=91993 .

A piglit test has been submitted for review as well (subtest of
arb_texture_storage-texture-storage).
---
 src/mesa/state_tracker/st_gen_mipmap.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/mesa/state_tracker/st_gen_mipmap.c 
b/src/mesa/state_tracker/st_gen_mipmap.c
index 26e1c21..3125b2a 100644
--- a/src/mesa/state_tracker/st_gen_mipmap.c
+++ b/src/mesa/state_tracker/st_gen_mipmap.c
@@ -61,6 +61,8 @@ compute_num_levels(struct gl_context *ctx,
 
numLevels = texObj->BaseLevel + baseImage->MaxNumLevels;
numLevels = MIN2(numLevels, (GLuint) texObj->MaxLevel + 1);
+   if (texObj->Immutable)
+  numLevels = MIN2(numLevels, texObj->NumLevels);
assert(numLevels >= 1);
 
return numLevels;
-- 
2.1.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] gallivm: disable f16c when not using AVX

2015-10-24 Thread Nicolai Hähnle

On 23.10.2015 23:26, srol...@vmware.com wrote:

From: Roland Scheidegger 

f16c intrinsic can only be emitted when AVX is used. So when we disable AVX
due to forcing 128bit vectors we must not use this intrinsic (depending on
llvm version, this worked previously because llvm used AVX even when we didn't
tell it to, however I've seen this fail with llvm 3.3 since
718249843b915decf8fccec92e466ac1a6219934 which seems to have the side effect
of disabling avx in llvm albeit it only touches sse flags really).
Possibly one day should actually try to use avx even with 128bit vectors...


Reviewed-by: Nicolai Hähnle 


---
  src/gallium/auxiliary/gallivm/lp_bld_init.c | 1 +
  1 file changed, 1 insertion(+)

diff --git a/src/gallium/auxiliary/gallivm/lp_bld_init.c 
b/src/gallium/auxiliary/gallivm/lp_bld_init.c
index 017d075..e6eede8 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_init.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_init.c
@@ -427,6 +427,7 @@ lp_build_init(void)
 */
util_cpu_caps.has_avx = 0;
util_cpu_caps.has_avx2 = 0;
+  util_cpu_caps.has_f16c = 0;
 }

  #ifdef PIPE_ARCH_PPC_64



___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/3] radeonsi: properly check if DCC is enabled and allocated

2015-10-24 Thread Nicolai Hähnle

On 24.10.2015 17:49, Marek Olšák wrote:

From: Marek Olšák 

---
  src/gallium/drivers/radeon/r600_texture.c | 2 +-
  src/gallium/drivers/radeonsi/cik_sdma.c   | 2 +-
  src/gallium/drivers/radeonsi/si_blit.c| 6 +++---
  src/gallium/drivers/radeonsi/si_dma.c | 2 +-
  src/gallium/drivers/radeonsi/si_state.c   | 4 ++--
  5 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/src/gallium/drivers/radeon/r600_texture.c 
b/src/gallium/drivers/radeon/r600_texture.c
index f7a11a2..40075ae 100644
--- a/src/gallium/drivers/radeon/r600_texture.c
+++ b/src/gallium/drivers/radeon/r600_texture.c
@@ -1367,7 +1367,7 @@ void evergreen_do_fast_color_clear(struct 
r600_common_context *rctx,
continue;
}

-   if (tex->surface.dcc_enabled) {
+   if (tex->dcc_buffer) {
uint32_t reset_value;
bool clear_words_needed;

diff --git a/src/gallium/drivers/radeonsi/cik_sdma.c 
b/src/gallium/drivers/radeonsi/cik_sdma.c
index 25fd09a..e53af1d 100644
--- a/src/gallium/drivers/radeonsi/cik_sdma.c
+++ b/src/gallium/drivers/radeonsi/cik_sdma.c
@@ -243,7 +243,7 @@ void cik_sdma_copy(struct pipe_context *ctx,
if (src->format != dst->format ||
rdst->surface.nsamples > 1 || rsrc->surface.nsamples > 1 ||
(rdst->dirty_level_mask | rdst->stencil_dirty_level_mask) & (1 << 
dst_level) ||
-   rdst->surface.dcc_enabled || rsrc->surface.dcc_enabled) {
+   rdst->dcc_buffer || rsrc->dcc_buffer) {
goto fallback;
}

diff --git a/src/gallium/drivers/radeonsi/si_blit.c 
b/src/gallium/drivers/radeonsi/si_blit.c
index a226436..302b75c 100644
--- a/src/gallium/drivers/radeonsi/si_blit.c
+++ b/src/gallium/drivers/radeonsi/si_blit.c
@@ -326,7 +326,7 @@ void si_decompress_color_textures(struct si_context *sctx,
assert(view);

tex = (struct r600_texture *)view->texture;
-   assert(tex->cmask.size || tex->fmask.size || 
tex->surface.dcc_enabled);
+   assert(tex->cmask.size || tex->fmask.size || tex->dcc_buffer);

si_blit_decompress_color(&sctx->b.b, tex,
 view->u.tex.first_level, 
view->u.tex.last_level,
@@ -455,7 +455,7 @@ static void si_decompress_subresource(struct pipe_context 
*ctx,
si_blit_decompress_depth_in_place(sctx, rtex, true,
  level, level,
  first_layer, 
last_layer);
-   } else if (rtex->fmask.size || rtex->cmask.size || 
rtex->surface.dcc_enabled) {
+   } else if (rtex->fmask.size || rtex->cmask.size || rtex->dcc_buffer) {
si_blit_decompress_color(ctx, rtex, level, level,
 first_layer, last_layer);
}
@@ -676,7 +676,7 @@ static bool do_hardware_msaa_resolve(struct pipe_context 
*ctx,
dst->surface.level[info->dst.level].mode >= RADEON_SURF_MODE_1D &&
!(dst->surface.flags & RADEON_SURF_SCANOUT) &&
(!dst->cmask.size || !dst->dirty_level_mask) && /* dst cannot be 
fast-cleared */
-   !dst->surface.dcc_enabled) {
+   !dst->dcc_buffer) {
si_blitter_begin(ctx, SI_COLOR_RESOLVE |
 (info->render_condition_enable ? 0 : 
SI_DISABLE_RENDER_COND));
util_blitter_custom_resolve_color(sctx->blitter,
diff --git a/src/gallium/drivers/radeonsi/si_dma.c 
b/src/gallium/drivers/radeonsi/si_dma.c
index 73c026c..581e89f 100644
--- a/src/gallium/drivers/radeonsi/si_dma.c
+++ b/src/gallium/drivers/radeonsi/si_dma.c
@@ -249,7 +249,7 @@ void si_dma_copy(struct pipe_context *ctx,
(rdst->dirty_level_mask | rdst->stencil_dirty_level_mask) & (1 << 
dst_level) ||
rdst->cmask.size || rdst->fmask.size ||
rsrc->cmask.size || rsrc->fmask.size ||
-   rdst->surface.dcc_enabled || rsrc->surface.dcc_enabled) {
+   rdst->dcc_buffer || rsrc->dcc_buffer) {
goto fallback;
}

diff --git a/src/gallium/drivers/radeonsi/si_state.c 
b/src/gallium/drivers/radeonsi/si_state.c
index c87f661..18b6405 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -1926,7 +1926,7 @@ static void si_initialize_color_surface(struct si_context 
*sctx,
surf->cb_color_info = color_info;
surf->cb_color_attrib = color_attrib;

-   if (sctx->b.chip_class >= VI && rtex->surface.dcc_enabled) {
+   if (sctx->b.chip_class >= VI && rtex->dcc_buffer) {
unsigned max_uncompressed_block_size = 2;
uint64_t dcc_offset = rtex->surface.level[level].dcc_offset;

@@ -2655,7 +2655,7 @@ si_create_sampler_view_custom(struct pipe_context *ctx,
view->state[5] = (S_008F24_BASE_ARRAY(state->u.tex.first_layer) |
  S

Re: [Mesa-dev] [PATCH 1/3] radeonsi: simplify DCC handling in si_initialize_color_surface

2015-10-24 Thread Nicolai Hähnle
With the remark on patch 2 (radeonsi: properly check if DCC is enabled 
and allocated), the series is


Reviewed-by: Nicolai Hähnle 

On 24.10.2015 17:49, Marek Olšák wrote:

From: Marek Olšák 

---
  src/gallium/drivers/radeonsi/si_state.c | 10 +++---
  1 file changed, 3 insertions(+), 7 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_state.c 
b/src/gallium/drivers/radeonsi/si_state.c
index 384c8e2..c87f661 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -1926,8 +1926,9 @@ static void si_initialize_color_surface(struct si_context 
*sctx,
surf->cb_color_info = color_info;
surf->cb_color_attrib = color_attrib;

-   if (sctx->b.chip_class >= VI) {
+   if (sctx->b.chip_class >= VI && rtex->surface.dcc_enabled) {
unsigned max_uncompressed_block_size = 2;
+   uint64_t dcc_offset = rtex->surface.level[level].dcc_offset;

if (rtex->surface.nsamples > 1) {
if (rtex->surface.bpe == 1)
@@ -1938,12 +1939,7 @@ static void si_initialize_color_surface(struct 
si_context *sctx,

surf->cb_dcc_control = 
S_028C78_MAX_UNCOMPRESSED_BLOCK_SIZE(max_uncompressed_block_size) |
   S_028C78_INDEPENDENT_64B_BLOCKS(1);
-
-   if (rtex->surface.dcc_enabled) {
-   uint64_t dcc_offset = 
rtex->surface.level[level].dcc_offset;
-
-   surf->cb_dcc_base = (rtex->dcc_buffer->gpu_address + 
dcc_offset) >> 8;
-   }
+   surf->cb_dcc_base = (rtex->dcc_buffer->gpu_address + dcc_offset) 
>> 8;
}

if (rtex->fmask.size) {



___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] winsys/amdgpu: remove the dcc_enable surface flag

2015-10-26 Thread Nicolai Hähnle

On 26.10.2015 11:41, Marek Olšák wrote:

From: Marek Olšák 

dcc_size is sufficient and doesn't need a further comment in my opinion.
---
  src/gallium/drivers/radeon/r600_texture.c  |  3 +--
  src/gallium/drivers/radeon/radeon_winsys.h |  1 -
  src/gallium/winsys/amdgpu/drm/amdgpu_surface.c | 13 ++---
  3 files changed, 7 insertions(+), 10 deletions(-)


Agreed, this is an even better solution.

Reviewed-by: Nicolai Hähnle 



diff --git a/src/gallium/drivers/radeon/r600_texture.c 
b/src/gallium/drivers/radeon/r600_texture.c
index 789c66f..edfdfe3 100644
--- a/src/gallium/drivers/radeon/r600_texture.c
+++ b/src/gallium/drivers/radeon/r600_texture.c
@@ -641,9 +641,8 @@ r600_texture_create_object(struct pipe_screen *screen,
return NULL;
}
}
-   if (rtex->surface.dcc_enabled) {
+   if (rtex->surface.dcc_size)
vi_texture_alloc_dcc_separate(rscreen, rtex);
-   }
}

/* Now create the backing buffer. */
diff --git a/src/gallium/drivers/radeon/radeon_winsys.h 
b/src/gallium/drivers/radeon/radeon_winsys.h
index 0178643..8bf1e15 100644
--- a/src/gallium/drivers/radeon/radeon_winsys.h
+++ b/src/gallium/drivers/radeon/radeon_winsys.h
@@ -371,7 +371,6 @@ struct radeon_surf {

  uint64_tdcc_size;
  uint64_tdcc_alignment;
-booldcc_enabled;
  };

  struct radeon_bo_list_item {
diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_surface.c 
b/src/gallium/winsys/amdgpu/drm/amdgpu_surface.c
index b442174..3006bd1 100644
--- a/src/gallium/winsys/amdgpu/drm/amdgpu_surface.c
+++ b/src/gallium/winsys/amdgpu/drm/amdgpu_surface.c
@@ -251,7 +251,7 @@ static int compute_level(struct amdgpu_winsys *ws,

 surf->bo_size = surf_level->offset + AddrSurfInfoOut->surfSize;

-   if (surf->dcc_enabled) {
+   if (AddrSurfInfoIn->flags.dccCompatible) {
AddrDccIn->colorSurfSize = AddrSurfInfoOut->surfSize;
AddrDccIn->tileMode = AddrSurfInfoOut->tileMode;
AddrDccIn->tileInfo = *AddrSurfInfoOut->pTileInfo;
@@ -267,10 +267,11 @@ static int compute_level(struct amdgpu_winsys *ws,
   surf->dcc_size = surf_level->dcc_offset + AddrDccOut->dccRamSize;
   surf->dcc_alignment = MAX2(surf->dcc_alignment, 
AddrDccOut->dccRamBaseAlign);
} else {
- surf->dcc_enabled = false;
+ surf->dcc_size = 0;
   surf_level->dcc_offset = 0;
}
 } else {
+  surf->dcc_size = 0;
surf_level->dcc_offset = 0;
 }

@@ -354,10 +355,6 @@ static int amdgpu_surface_init(struct radeon_winsys *rws,
 AddrDccIn.numSamples = AddrSurfInfoIn.numSamples = surf->nsamples;
 AddrSurfInfoIn.tileIndex = -1;

-   surf->dcc_enabled =  !(surf->flags & RADEON_SURF_Z_OR_SBUFFER) &&
-!(surf->flags & RADEON_SURF_SCANOUT) &&
-!compressed && AddrDccIn.numSamples <= 1;
-
 /* Set the micro tile type. */
 if (surf->flags & RADEON_SURF_SCANOUT)
AddrSurfInfoIn.tileType = ADDR_DISPLAYABLE;
@@ -373,7 +370,9 @@ static int amdgpu_surface_init(struct radeon_winsys *rws,
 AddrSurfInfoIn.flags.display = (surf->flags & RADEON_SURF_SCANOUT) != 0;
 AddrSurfInfoIn.flags.pow2Pad = surf->last_level > 0;
 AddrSurfInfoIn.flags.degrade4Space = 1;
-   AddrSurfInfoIn.flags.dccCompatible = surf->dcc_enabled;
+   AddrSurfInfoIn.flags.dccCompatible = !(surf->flags & RADEON_SURF_Z_OR_SBUFFER) 
&&
+!(surf->flags & RADEON_SURF_SCANOUT) &&
+!compressed && AddrDccIn.numSamples <= 
1;

 /* This disables incorrect calculations (hacks) in addrlib. */
 AddrSurfInfoIn.flags.noStencil = 1;



___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] r600: Fix special negative immediate constants when using ABS modifier.

2015-10-26 Thread Nicolai Hähnle

Hi Ivan,

On 25.10.2015 02:00, Ivan Kalvachev wrote:

Some constants (like 1.0 and 0.5) could be inlined as immediate inputs
without using their literal value. The r600_bytecode_special_constants()
function emulates the negative of these constants by using NEG modifier.

However some shaders define -1.0 constant and want to use it as 1.0.
They do so by using ABS modifier. But r600_bytecode_special_constants()
set NEG in addition to ABS. Since NEG modifier have priority over ABS one,
we get -|1.0| as result, instead of |1.0|.

The patch simply prevents the additional switching of NEG when ABS is set.


Nice catch. Is there a simple test case (e.g. in piglit) that exposes 
the incorrect behavior?



Signed-off-by: Ivan Kalvachev 
---
  src/gallium/drivers/r600/r600_asm.c| 9 +
  src/gallium/drivers/r600/r600_shader.c | 2 +-
  2 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/src/gallium/drivers/r600/r600_asm.c
b/src/gallium/drivers/r600/r600_asm.c
index bc69806..8fc622c 100644
--- a/src/gallium/drivers/r600/r600_asm.c
+++ b/src/gallium/drivers/r600/r600_asm.c
@@ -635,8 +635,9 @@ static int replace_gpr_with_pv_ps(struct r600_bytecode *bc,
 return 0;
  }

-void r600_bytecode_special_constants(uint32_t value, unsigned *sel,
unsigned *neg)
+void r600_bytecode_special_constants(uint32_t value, unsigned *sel,
unsigned *neg, unsigned abs)
  {
+


Please remove the extra whitespace line.

Cheers,
Nicolai


 switch(value) {
 case 0:
 *sel = V_SQ_ALU_SRC_0;
@@ -655,11 +656,11 @@ void r600_bytecode_special_constants(uint32_t
value, unsigned *sel, unsigned *ne
 break;
 case 0xBF80: /* -1.0f */
 *sel = V_SQ_ALU_SRC_1;
-   *neg ^= 1;
+   *neg ^= !abs;
 break;
 case 0xBF00: /* -0.5f */
 *sel = V_SQ_ALU_SRC_0_5;
-   *neg ^= 1;
+   *neg ^= !abs;
 break;
 default:
 *sel = V_SQ_ALU_SRC_LITERAL;
@@ -1208,7 +1209,7 @@ int r600_bytecode_add_alu_type(struct r600_bytecode *bc,
 }
 if (nalu->src[i].sel == V_SQ_ALU_SRC_LITERAL)
 r600_bytecode_special_constants(nalu->src[i].value,
-   &nalu->src[i].sel, &nalu->src[i].neg);
+   &nalu->src[i].sel, &nalu->src[i].neg,
nalu->src[i].abs);
 }
 if (nalu->dst.sel >= bc->ngpr) {
 bc->ngpr = nalu->dst.sel + 1;
diff --git a/src/gallium/drivers/r600/r600_shader.c
b/src/gallium/drivers/r600/r600_shader.c
index 8efe902..50c0329 100644
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -1008,7 +1008,7 @@ static void tgsi_src(struct r600_shader_ctx *ctx,
 (tgsi_src->Register.SwizzleX ==
tgsi_src->Register.SwizzleW)) {

 index = tgsi_src->Register.Index * 4 +
tgsi_src->Register.SwizzleX;
-
r600_bytecode_special_constants(ctx->literals[index], &r600_src->sel,
&r600_src->neg);
+
r600_bytecode_special_constants(ctx->literals[index], &r600_src->sel,
&r600_src->neg, r600_src->abs);
 if (r600_src->sel != V_SQ_ALU_SRC_LITERAL)
 return;
 }



___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev



___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 3/3] st/mesa: implement ARB_copy_image

2015-10-27 Thread Nicolai Hähnle

On 25.10.2015 18:25, Marek Olšák wrote:

+/**
+ * Handle complex format conversions using 2 blits with a temporary texture
+ * in between, e.g. blitting from B10G10R10A2 to G16R16.
+ *
+ * This example is implemented this way:
+ * 1) First, blit from B10G10R10A2 to R10G10B10A2, which is canonical, so it
+ *can be reinterpreted as a different canonical format of the same bpp,
+ *such as R16G16. This blit only swaps R and B 10-bit components.
+ * 2) Finnaly, blit the result, which is R10G10B10A2, as R16G16 to G16R16.
+ *This blit only swaps R and G 16-bit components.
+ */


Typo: Finally

Nicolai
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v2] st/mesa: fix mipmap generation for immutable textures with incomplete pyramids

2015-10-28 Thread Nicolai Hähnle
Without the clamping by NumLevels, the state tracker would reallocate the
texture storage (incorrect) and even fail to copy the base level image
after reallocation, leading to the graphical glitch of
https://bugs.freedesktop.org/show_bug.cgi?id=91993 .

A piglit test has been submitted for review as well (subtest of
arb_texture_storage-texture-storage).

v2: also bypass all calls to st_finalize_texture (suggested by Marek Olšák)

Cc: mesa-sta...@lists.freedesktop.org
Reviewed-by: Marek Olšák 
---
 src/mesa/state_tracker/st_gen_mipmap.c | 68 ++
 1 file changed, 36 insertions(+), 32 deletions(-)

diff --git a/src/mesa/state_tracker/st_gen_mipmap.c 
b/src/mesa/state_tracker/st_gen_mipmap.c
index 26e1c21..b370040 100644
--- a/src/mesa/state_tracker/st_gen_mipmap.c
+++ b/src/mesa/state_tracker/st_gen_mipmap.c
@@ -61,6 +61,8 @@ compute_num_levels(struct gl_context *ctx,
 
numLevels = texObj->BaseLevel + baseImage->MaxNumLevels;
numLevels = MIN2(numLevels, (GLuint) texObj->MaxLevel + 1);
+   if (texObj->Immutable)
+  numLevels = MIN2(numLevels, texObj->NumLevels);
assert(numLevels >= 1);
 
return numLevels;
@@ -99,38 +101,40 @@ st_generate_mipmap(struct gl_context *ctx, GLenum target,
 */
stObj->lastLevel = lastLevel;
 
-   if (pt->last_level < lastLevel) {
-  /* The current gallium texture doesn't have space for all the
-   * mipmap levels we need to generate.  So allocate a new texture.
-   */
-  struct pipe_resource *oldTex = stObj->pt;
-
-  /* create new texture with space for more levels */
-  stObj->pt = st_texture_create(st,
-oldTex->target,
-oldTex->format,
-lastLevel,
-oldTex->width0,
-oldTex->height0,
-oldTex->depth0,
-oldTex->array_size,
-0,
-oldTex->bind);
-
-  /* This will copy the old texture's base image into the new texture
-   * which we just allocated.
-   */
-  st_finalize_texture(ctx, st->pipe, texObj);
-
-  /* release the old tex (will likely be freed too) */
-  pipe_resource_reference(&oldTex, NULL);
-  st_texture_release_all_sampler_views(st, stObj);
-   }
-   else {
-  /* Make sure that the base texture image data is present in the
-   * texture buffer.
-   */
-  st_finalize_texture(ctx, st->pipe, texObj);
+   if (!texObj->Immutable) {
+  if (pt->last_level < lastLevel) {
+ /* The current gallium texture doesn't have space for all the
+ * mipmap levels we need to generate.  So allocate a new texture.
+ */
+ struct pipe_resource *oldTex = stObj->pt;
+
+ /* create new texture with space for more levels */
+ stObj->pt = st_texture_create(st,
+   oldTex->target,
+   oldTex->format,
+   lastLevel,
+   oldTex->width0,
+   oldTex->height0,
+   oldTex->depth0,
+   oldTex->array_size,
+   0,
+   oldTex->bind);
+
+ /* This will copy the old texture's base image into the new texture
+ * which we just allocated.
+ */
+ st_finalize_texture(ctx, st->pipe, texObj);
+
+ /* release the old tex (will likely be freed too) */
+ pipe_resource_reference(&oldTex, NULL);
+ st_texture_release_all_sampler_views(st, stObj);
+  }
+  else {
+ /* Make sure that the base texture image data is present in the
+ * texture buffer.
+ */
+ st_finalize_texture(ctx, st->pipe, texObj);
+  }
}
 
pt = stObj->pt;
-- 
2.5.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v3] r600g: Fix special negative immediate constants when using ABS modifier.

2015-10-29 Thread Nicolai Hähnle

On 29.10.2015 01:52, Ivan Kalvachev wrote:

-- Forwarded message --
From: Ivan Kalvachev 
Date: Wed, 28 Oct 2015 23:46:44 +0200
Subject: [PATCH v3] r600g: Fix special negative immediate constants
when using ABS modifier.
To: Nicolai Hähnle 

On 10/26/15, Nicolai Hähnle  wrote:

Hi Ivan,

On 25.10.2015 02:00, Ivan Kalvachev wrote:

Some constants (like 1.0 and 0.5) could be inlined as immediate inputs
without using their literal value. The r600_bytecode_special_constants()
function emulates the negative of these constants by using NEG modifier.

However some shaders define -1.0 constant and want to use it as 1.0.
They do so by using ABS modifier. But r600_bytecode_special_constants()
set NEG in addition to ABS. Since NEG modifier have priority over ABS
one,
we get -|1.0| as result, instead of |1.0|.

The patch simply prevents the additional switching of NEG when ABS is
set.


Nice catch. Is there a simple test case (e.g. in piglit) that exposes
the incorrect behavior?


Not that I know of.

I've located the bug investigating visual problem in Nine.
https://github.com/iXit/Mesa-3D/issues/126
https://github.com/iXit/Mesa-3D/issues/127

I also heard that it fixes artifacts in "Need for Speed: Undercover"
and "Skyrim", once again, when using Nine.


I see. I guess it's not too surprising that Nine creates shaders that 
look a bit different from the Mesa statetracker's.


Reviewed-by: Nicolai Hähnle 

This should probably also go to stable.

Do you need somebody to push this for you or can you do it yourself?

Cheers,
Nicolai


Signed-off-by: Ivan Kalvachev 
---
   src/gallium/drivers/r600/r600_asm.c| 9 +
   src/gallium/drivers/r600/r600_shader.c | 2 +-
   2 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/src/gallium/drivers/r600/r600_asm.c
b/src/gallium/drivers/r600/r600_asm.c
index bc69806..8fc622c 100644
--- a/src/gallium/drivers/r600/r600_asm.c
+++ b/src/gallium/drivers/r600/r600_asm.c
@@ -635,8 +635,9 @@ static int replace_gpr_with_pv_ps(struct r600_bytecode
*bc,
  return 0;
   }

-void r600_bytecode_special_constants(uint32_t value, unsigned *sel,
unsigned *neg)
+void r600_bytecode_special_constants(uint32_t value, unsigned *sel,
unsigned *neg, unsigned abs)
   {
+


Please remove the extra whitespace line.

Cheers,
Nicolai



I'm attaching v3 of the patch. Same as v2, but without the extra empty line.

Best Regards



___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev



___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v2] st/mesa: fix mipmap generation for immutable textures with incomplete pyramids

2015-10-29 Thread Nicolai Hähnle

On 29.10.2015 14:13, Marek Olšák wrote:

On Wed, Oct 28, 2015 at 1:00 PM, Nicolai Hähnle  wrote:

Without the clamping by NumLevels, the state tracker would reallocate the
texture storage (incorrect) and even fail to copy the base level image
after reallocation, leading to the graphical glitch of
https://bugs.freedesktop.org/show_bug.cgi?id=91993 .

A piglit test has been submitted for review as well (subtest of
arb_texture_storage-texture-storage).

v2: also bypass all calls to st_finalize_texture (suggested by Marek Olšák)

Cc: mesa-sta...@lists.freedesktop.org
Reviewed-by: Marek Olšák 


This looks good.

(a minor nit: an updated patch should not contain any reviewed-by
tags, because the updated version hadn't been seen by anybody at the
time of sending it to the list; it's okay to keep the tag now that
I've reviewed it)


Sorry about that, I'll be more careful about that in the future.

Nicolai



Marek



___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] Patchwork/mesa-stable question (was: Re: [PATCH v3] r600g: Fix special negative immediate constants when using ABS modifier.)

2015-10-29 Thread Nicolai Hähnle

On 29.10.2015 10:24, Ivan Kalvachev wrote:
[snip]

On 10/29/15, Nicolai Hähnle  wrote:

On 29.10.2015 01:52, Ivan Kalvachev wrote:

On 10/26/15, Nicolai Hähnle  wrote:

On 25.10.2015 02:00, Ivan Kalvachev wrote:

Some constants (like 1.0 and 0.5) could be inlined as immediate inputs
without using their literal value. The
r600_bytecode_special_constants()
function emulates the negative of these constants by using NEG
modifier.

However some shaders define -1.0 constant and want to use it as 1.0.
They do so by using ABS modifier. But r600_bytecode_special_constants()
set NEG in addition to ABS. Since NEG modifier have priority over ABS
one,
we get -|1.0| as result, instead of |1.0|.

The patch simply prevents the additional switching of NEG when ABS is
set.


Nice catch. Is there a simple test case (e.g. in piglit) that exposes
the incorrect behavior?


Not that I know of.

I've located the bug investigating visual problem in Nine.
https://github.com/iXit/Mesa-3D/issues/126
https://github.com/iXit/Mesa-3D/issues/127

I also heard that it fixes artifacts in "Need for Speed: Undercover"
and "Skyrim", once again, when using Nine.


I see. I guess it's not too surprising that Nine creates shaders that
look a bit different from the Mesa statetracker's.

Reviewed-by: Nicolai Hähnle 

This should probably also go to stable.

Do you need somebody to push this for you or can you do it yourself?

Cheers,
Nicolai


Yes, please.
I'm not developer and I cannot push it myself.


I pushed the patch.

I am not familiar with patchwork yet and have a related question: on my 
push, I got the following error message related to patchwork:


remote: E: failed to find patch for rev 
f75f21a24ae2dd83507f3d4d8007f0fcfe6db802


Apparently, patchwork didn't pick up Ivan's v3 patch, perhaps because it 
wasn't inline. Is this something to worry about? Specifically, I believe 
the patch is a candidate for the stable branch, and I added the 
appropriate Cc: in the commit message. Does the message above prevent it 
from being picked up?


Sorry for the noise :/

Thanks!
Nicolai
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] radeonsi: add basic glClearBufferSubData acceleration

2015-11-05 Thread Nicolai Hähnle

On 04.11.2015 00:47, Marek Olšák wrote:

From: Marek Olšák 

---
  src/gallium/drivers/radeonsi/si_blit.c | 55 ++
  1 file changed, 55 insertions(+)

diff --git a/src/gallium/drivers/radeonsi/si_blit.c 
b/src/gallium/drivers/radeonsi/si_blit.c
index fce014a..e934146 100644
--- a/src/gallium/drivers/radeonsi/si_blit.c
+++ b/src/gallium/drivers/radeonsi/si_blit.c
@@ -731,9 +731,64 @@ static void si_flush_resource(struct pipe_context *ctx,
}
  }

+static void si_pipe_clear_buffer(struct pipe_context *ctx,
+struct pipe_resource *dst,
+unsigned offset, unsigned size,
+const void *clear_value,
+int clear_value_size)
+{
+   struct si_context *sctx = (struct si_context*)ctx;
+   const uint32_t *u32 = clear_value;
+   unsigned i;
+   bool clear_value_fits_dword = true;
+   uint8_t *map;
+
+   if (clear_value_size > 4)
+   for (i = 1; i < clear_value_size / 4; i++)
+   if (u32[0] != u32[i]) {
+   clear_value_fits_dword = false;
+   break;
+   }
+
+   /* Use CP DMA for the simple case. */
+   if (offset % 4 == 0 && size % 4 == 0 && clear_value_fits_dword) {
+   uint32_t value = u32[0];
+
+   switch (clear_value_size) {
+   case 1:
+   value &= 0xff;
+   value |= (value << 8) | (value << 16) | (value << 24);
+   break;
+   case 2:
+   value &= 0x;
+   value |= value << 16;
+   break;
+   }


To reduce the chance of complaints by valgrind et al:

switch (clear_value_size) {
case 1:
value = *(uint8_t *)u32;
value |= (value << 8) | (value << 16) | (value << 24);
break;
case 2:
value = *(uint16_t *)u32;
value |= value << 16;
break;
default:
value = *u32;
break;
}

Cheers,
Nicolai


+
+   sctx->b.clear_buffer(ctx, dst, offset, size, value, false);
+   return;
+   }
+
+   /* TODO: use a compute shader for other cases. */
+
+   /* Software fallback. */
+   map = r600_buffer_map_sync_with_rings(&sctx->b, r600_resource(dst),
+ PIPE_TRANSFER_WRITE);
+   if (!map)
+   return;
+
+   map += offset;
+   size /= clear_value_size;
+   for (i = 0; i < size; i++) {
+   memcpy(map, clear_value, clear_value_size);
+   map += clear_value_size;
+   }
+}
+
  void si_init_blit_functions(struct si_context *sctx)
  {
sctx->b.b.clear = si_clear;
+   sctx->b.b.clear_buffer = si_pipe_clear_buffer;
sctx->b.b.clear_render_target = si_clear_render_target;
sctx->b.b.clear_depth_stencil = si_clear_depth_stencil;
sctx->b.b.resource_copy_region = si_resource_copy_region;



___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 6/6] radeonsi: add glClearBufferSubData acceleration

2015-11-09 Thread Nicolai Hähnle

On 08.11.2015 22:44, Marek Olšák wrote:

From: Marek Olšák 

Unaligned 8-bit and 16-bit clears are done in software.


I found this confusing at first. I think a better phrasing is something 
along the lines of:


8-bit and 16-bit clears which are not aligned to dwords are done in 
software.


With this, the whole series is

Reviewed-by: Nicolai Hähnle 


---
  src/gallium/drivers/radeonsi/si_blit.c | 60 ++
  1 file changed, 60 insertions(+)

diff --git a/src/gallium/drivers/radeonsi/si_blit.c 
b/src/gallium/drivers/radeonsi/si_blit.c
index d320ac4..31f22c4 100644
--- a/src/gallium/drivers/radeonsi/si_blit.c
+++ b/src/gallium/drivers/radeonsi/si_blit.c
@@ -737,9 +737,69 @@ static void si_flush_resource(struct pipe_context *ctx,
}
  }

+static void si_pipe_clear_buffer(struct pipe_context *ctx,
+struct pipe_resource *dst,
+unsigned offset, unsigned size,
+const void *clear_value_ptr,
+int clear_value_size)
+{
+   struct si_context *sctx = (struct si_context*)ctx;
+   uint32_t dword_value;
+   unsigned i;
+
+   assert(offset % clear_value_size == 0);
+   assert(size % clear_value_size == 0);
+
+   if (clear_value_size > 4) {
+   const uint32_t *u32 = clear_value_ptr;
+   bool clear_dword_duplicated = true;
+
+   /* See if we can lower large fills to dword fills. */
+   for (i = 1; i < clear_value_size / 4; i++)
+   if (u32[0] != u32[i]) {
+   clear_dword_duplicated = false;
+   break;
+   }
+
+   if (!clear_dword_duplicated) {
+   /* Use transform feedback for 64-bit, 96-bit, and
+* 128-bit fills.
+*/
+   union pipe_color_union clear_value;
+
+   memcpy(&clear_value, clear_value_ptr, clear_value_size);
+   si_blitter_begin(ctx, SI_DISABLE_RENDER_COND);
+   util_blitter_clear_buffer(sctx->blitter, dst, offset,
+ size, clear_value_size / 4,
+ &clear_value);
+   si_blitter_end(ctx);
+   return;
+   }
+   }
+
+   /* Expand the clear value to a dword. */
+   switch (clear_value_size) {
+   case 1:
+   dword_value = *(uint8_t*)clear_value_ptr;
+   dword_value |= (dword_value << 8) |
+  (dword_value << 16) |
+  (dword_value << 24);
+   break;
+   case 2:
+   dword_value = *(uint16_t*)clear_value_ptr;
+   dword_value |= dword_value << 16;
+   break;
+   default:
+   dword_value = *(uint32_t*)clear_value_ptr;
+   }
+
+   sctx->b.clear_buffer(ctx, dst, offset, size, dword_value, false);
+}
+
  void si_init_blit_functions(struct si_context *sctx)
  {
sctx->b.b.clear = si_clear;
+   sctx->b.b.clear_buffer = si_pipe_clear_buffer;
sctx->b.b.clear_render_target = si_clear_render_target;
sctx->b.b.clear_depth_stencil = si_clear_depth_stencil;
sctx->b.b.resource_copy_region = si_resource_copy_region;



___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 7/7] gallium/radeon: inline the r600_rings structure

2015-11-09 Thread Nicolai Hähnle

The series is

Reviewed-by: Nicolai Hähnle 

On 08.11.2015 22:45, Marek Olšák wrote:

From: Marek Olšák 

---
  src/gallium/drivers/r600/evergreen_compute.c| 14 ++---
  src/gallium/drivers/r600/evergreen_hw_context.c | 10 ++--
  src/gallium/drivers/r600/evergreen_state.c  | 66 
  src/gallium/drivers/r600/r600_blit.c|  2 +-
  src/gallium/drivers/r600/r600_hw_context.c  | 34 ++---
  src/gallium/drivers/r600/r600_pipe.c| 10 ++--
  src/gallium/drivers/r600/r600_state.c   | 68 -
  src/gallium/drivers/r600/r600_state_common.c| 36 ++---
  src/gallium/drivers/radeon/r600_buffer_common.c | 32 ++--
  src/gallium/drivers/radeon/r600_pipe_common.c   | 34 ++---
  src/gallium/drivers/radeon/r600_pipe_common.h   |  8 +--
  src/gallium/drivers/radeon/r600_query.c | 16 +++---
  src/gallium/drivers/radeon/r600_streamout.c | 18 +++
  src/gallium/drivers/radeonsi/cik_sdma.c | 14 ++---
  src/gallium/drivers/radeonsi/si_compute.c   | 12 ++---
  src/gallium/drivers/radeonsi/si_cp_dma.c| 10 ++--
  src/gallium/drivers/radeonsi/si_descriptors.c   | 38 +++---
  src/gallium/drivers/radeonsi/si_dma.c   | 14 ++---
  src/gallium/drivers/radeonsi/si_hw_context.c| 16 +++---
  src/gallium/drivers/radeonsi/si_pipe.c  |  8 +--
  src/gallium/drivers/radeonsi/si_pm4.c   |  6 +--
  src/gallium/drivers/radeonsi/si_state.c | 34 ++---
  src/gallium/drivers/radeonsi/si_state_draw.c| 24 -
  src/gallium/drivers/radeonsi/si_state_shaders.c |  4 +-
  24 files changed, 262 insertions(+), 266 deletions(-)

diff --git a/src/gallium/drivers/r600/evergreen_compute.c 
b/src/gallium/drivers/r600/evergreen_compute.c
index 6f2b7ba..5743e3f 100644
--- a/src/gallium/drivers/r600/evergreen_compute.c
+++ b/src/gallium/drivers/r600/evergreen_compute.c
@@ -346,7 +346,7 @@ static void evergreen_emit_direct_dispatch(
const uint *block_layout, const uint *grid_layout)
  {
int i;
-   struct radeon_winsys_cs *cs = rctx->b.rings.gfx.cs;
+   struct radeon_winsys_cs *cs = rctx->b.gfx.cs;
struct r600_pipe_compute *shader = rctx->cs_shader_state.shader;
unsigned num_waves;
unsigned num_pipes = rctx->screen->b.info.r600_max_pipes;
@@ -417,12 +417,12 @@ static void evergreen_emit_direct_dispatch(
  static void compute_emit_cs(struct r600_context *ctx, const uint 
*block_layout,
const uint *grid_layout)
  {
-   struct radeon_winsys_cs *cs = ctx->b.rings.gfx.cs;
+   struct radeon_winsys_cs *cs = ctx->b.gfx.cs;
unsigned i;

/* make sure that the gfx ring is only one active */
-   if (ctx->b.rings.dma.cs && ctx->b.rings.dma.cs->cdw) {
-   ctx->b.rings.dma.flush(ctx, RADEON_FLUSH_ASYNC, NULL);
+   if (ctx->b.dma.cs && ctx->b.dma.cs->cdw) {
+   ctx->b.dma.flush(ctx, RADEON_FLUSH_ASYNC, NULL);
}

/* Initialize all the compute-related registers.
@@ -439,7 +439,7 @@ static void compute_emit_cs(struct r600_context *ctx, const 
uint *block_layout,
/* XXX support more than 8 colorbuffers (the offsets are not a multiple 
of 0x3C for CB8-11) */
for (i = 0; i < 8 && i < ctx->framebuffer.state.nr_cbufs; i++) {
struct r600_surface *cb = (struct 
r600_surface*)ctx->framebuffer.state.cbufs[i];
-   unsigned reloc = radeon_add_to_buffer_list(&ctx->b, 
&ctx->b.rings.gfx,
+   unsigned reloc = radeon_add_to_buffer_list(&ctx->b, &ctx->b.gfx,
   (struct 
r600_resource*)cb->base.texture,
   RADEON_USAGE_READWRITE,
   
RADEON_PRIO_SHADER_RW_BUFFER);
@@ -538,7 +538,7 @@ void evergreen_emit_cs_shader(
struct r600_cs_shader_state *state =
(struct r600_cs_shader_state*)atom;
struct r600_pipe_compute *shader = state->shader;
-   struct radeon_winsys_cs *cs = rctx->b.rings.gfx.cs;
+   struct radeon_winsys_cs *cs = rctx->b.gfx.cs;
uint64_t va;
struct r600_resource *code_bo;
unsigned ngpr, nstack;
@@ -564,7 +564,7 @@ void evergreen_emit_cs_shader(
radeon_emit(cs, 0); /* R_0288D8_SQ_PGM_RESOURCES_LS_2 */

radeon_emit(cs, PKT3C(PKT3_NOP, 0, 0));
-   radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.rings.gfx,
+   radeon_emit(cs, radeon_add_to_buffer_list(&rctx->b, &rctx->b.gfx,
  code_bo, RADEON_USAGE_READ,
  RADEON_PRIO_USER_SHADER));
  }
diff --git a/src/galliu

Re: [Mesa-dev] [PATCH 4/7] gallium/radeon: simplify restoring render condition after flush

2015-11-09 Thread Nicolai Hähnle

On 08.11.2015 22:48, Marek Olšák wrote:

From: Marek Olšák 

---
  src/gallium/drivers/radeon/r600_pipe_common.c | 22 +-
  src/gallium/drivers/radeon/r600_pipe_common.h |  4 
  2 files changed, 5 insertions(+), 21 deletions(-)

diff --git a/src/gallium/drivers/radeon/r600_pipe_common.c 
b/src/gallium/drivers/radeon/r600_pipe_common.c
index 8739914..224da11 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.c
+++ b/src/gallium/drivers/radeon/r600_pipe_common.c

[snip]

@@ -173,12 +162,11 @@ void r600_postflush_resume_features(struct 
r600_common_context *ctx)
r600_resume_timer_queries(ctx);
}

-   /* Re-enable render condition. */
-   if (ctx->saved_render_cond) {
-   ctx->b.render_condition(&ctx->b, ctx->saved_render_cond,
- ctx->saved_render_cond_cond,
- ctx->saved_render_cond_mode);
-   }
+   /* Just re-emit PKT3_SET_PREDICATION. */
+   if (ctx->current_render_cond)
+   ctx->b.render_condition(&ctx->b, ctx->current_render_cond,
+   ctx->current_render_cond_cond,
+   ctx->current_render_cond_mode);


Drop the "Just" from the comment, because while it makes sense in the 
context of the diff, it does not make sense when only looking at the new 
code.


Nicolai
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 4/7] gallium/radeon: simplify restoring render condition after flush

2015-11-09 Thread Nicolai Hähnle

On 09.11.2015 10:43, Nicolai Hähnle wrote:

On 08.11.2015 22:48, Marek Olšák wrote:

From: Marek Olšák 

---
  src/gallium/drivers/radeon/r600_pipe_common.c | 22
+-
  src/gallium/drivers/radeon/r600_pipe_common.h |  4 
  2 files changed, 5 insertions(+), 21 deletions(-)

diff --git a/src/gallium/drivers/radeon/r600_pipe_common.c
b/src/gallium/drivers/radeon/r600_pipe_common.c
index 8739914..224da11 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.c
+++ b/src/gallium/drivers/radeon/r600_pipe_common.c

[snip]

@@ -173,12 +162,11 @@ void r600_postflush_resume_features(struct
r600_common_context *ctx)
  r600_resume_timer_queries(ctx);
  }

-/* Re-enable render condition. */
-if (ctx->saved_render_cond) {
-ctx->b.render_condition(&ctx->b, ctx->saved_render_cond,
-  ctx->saved_render_cond_cond,
-  ctx->saved_render_cond_mode);
-}
+/* Just re-emit PKT3_SET_PREDICATION. */
+if (ctx->current_render_cond)
+ctx->b.render_condition(&ctx->b, ctx->current_render_cond,
+ctx->current_render_cond_cond,
+ctx->current_render_cond_mode);


Drop the "Just" from the comment, because while it makes sense in the
context of the diff, it does not make sense when only looking at the new
code.


Never mind, this becomes moot anyway with patch 5.



Nicolai


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 6/7] gallium/radeon: remove predicate_drawing flag

2015-11-09 Thread Nicolai Hähnle

On 08.11.2015 22:48, Marek Olšák wrote:

From: Marek Olšák 

---
  src/gallium/drivers/r600/r600_hw_context.c| 2 +-
  src/gallium/drivers/r600/r600_state_common.c  | 2 +-
  src/gallium/drivers/radeon/r600_pipe_common.h | 1 -
  src/gallium/drivers/radeon/r600_query.c   | 1 -
  src/gallium/drivers/radeonsi/si_state_draw.c  | 2 +-
  5 files changed, 3 insertions(+), 5 deletions(-)

diff --git a/src/gallium/drivers/r600/r600_hw_context.c 
b/src/gallium/drivers/r600/r600_hw_context.c
index 44e7cf2..2383175 100644
--- a/src/gallium/drivers/r600/r600_hw_context.c
+++ b/src/gallium/drivers/r600/r600_hw_context.c
@@ -81,7 +81,7 @@ void r600_need_cs_space(struct r600_context *ctx, unsigned 
num_dw,
}

/* Count in render_condition(NULL) at the end of CS. */
-   if (ctx->b.predicate_drawing) {
+   if (ctx->b.current_render_cond) {
num_dw += 3;
}


Aside: What is this actually trying to count? The way I understand 
conditional drawing now, there is no reset state that we have to emit at 
the end of the CS (other than for the queries themselves, but AFAIU that 
is already counted by num_cs_dw_nontimer_queries_suspend above). Perhaps 
this block could actually be dropped entirely?


Cheers,
Nicolai



diff --git a/src/gallium/drivers/r600/r600_state_common.c 
b/src/gallium/drivers/r600/r600_state_common.c
index 28aedff..5cf5208 100644
--- a/src/gallium/drivers/r600/r600_state_common.c
+++ b/src/gallium/drivers/r600/r600_state_common.c
@@ -1478,7 +1478,7 @@ static void r600_draw_vbo(struct pipe_context *ctx, const 
struct pipe_draw_info
struct pipe_draw_info info = *dinfo;
struct pipe_index_buffer ib = {};
struct radeon_winsys_cs *cs = rctx->b.gfx.cs;
-   bool render_cond_bit = rctx->b.predicate_drawing && 
!rctx->b.render_cond_force_off;
+   bool render_cond_bit = rctx->b.current_render_cond && 
!rctx->b.render_cond_force_off;
uint64_t mask;

if (!info.indirect && !info.count && (info.indexed || 
!info.count_from_stream_output)) {
diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h 
b/src/gallium/drivers/radeon/r600_pipe_common.h
index 09465ae..ba9000f 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.h
+++ b/src/gallium/drivers/radeon/r600_pipe_common.h
@@ -421,7 +421,6 @@ struct r600_common_context {
struct pipe_query   *current_render_cond;
unsignedcurrent_render_cond_mode;
boolean current_render_cond_cond;
-   boolpredicate_drawing;
boolrender_cond_force_off; /* for u_blitter 
*/

/* MSAA sample locations.
diff --git a/src/gallium/drivers/radeon/r600_query.c 
b/src/gallium/drivers/radeon/r600_query.c
index 145b629..9f92587 100644
--- a/src/gallium/drivers/radeon/r600_query.c
+++ b/src/gallium/drivers/radeon/r600_query.c
@@ -834,7 +834,6 @@ static void r600_render_condition(struct pipe_context *ctx,
rctx->current_render_cond = query;
rctx->current_render_cond_cond = condition;
rctx->current_render_cond_mode = mode;
-   rctx->predicate_drawing = query != NULL;

/* Compute the size of SET_PREDICATION packets. */
atom->num_dw = 0;
diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c 
b/src/gallium/drivers/radeonsi/si_state_draw.c
index ebc01e8..79e8876 100644
--- a/src/gallium/drivers/radeonsi/si_state_draw.c
+++ b/src/gallium/drivers/radeonsi/si_state_draw.c
@@ -457,7 +457,7 @@ static void si_emit_draw_packets(struct si_context *sctx,
  {
struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
unsigned sh_base_reg = 
sctx->shader_userdata.sh_base[PIPE_SHADER_VERTEX];
-   bool render_cond_bit = sctx->b.predicate_drawing && 
!sctx->b.render_cond_force_off;
+   bool render_cond_bit = sctx->b.current_render_cond && 
!sctx->b.render_cond_force_off;

if (info->count_from_stream_output) {
struct r600_so_target *t =



___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 7/7] gallium/radeon: shorten render_cond variable names

2015-11-09 Thread Nicolai Hähnle

The series is

Reviewed-by: Nicolai Hähnle 

On 08.11.2015 22:48, Marek Olšák wrote:

From: Marek Olšák 

and ..._cond -> ..._invert
---
  src/gallium/drivers/r600/r600_hw_context.c|  2 +-
  src/gallium/drivers/r600/r600_state_common.c  |  2 +-
  src/gallium/drivers/radeon/r600_pipe_common.h |  6 +++---
  src/gallium/drivers/radeon/r600_query.c   | 14 +++---
  src/gallium/drivers/radeon/r600_texture.c |  2 +-
  src/gallium/drivers/radeonsi/si_state_draw.c  |  2 +-
  6 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/src/gallium/drivers/r600/r600_hw_context.c 
b/src/gallium/drivers/r600/r600_hw_context.c
index 2383175..917808a 100644
--- a/src/gallium/drivers/r600/r600_hw_context.c
+++ b/src/gallium/drivers/r600/r600_hw_context.c
@@ -81,7 +81,7 @@ void r600_need_cs_space(struct r600_context *ctx, unsigned 
num_dw,
}

/* Count in render_condition(NULL) at the end of CS. */
-   if (ctx->b.current_render_cond) {
+   if (ctx->b.render_cond) {
num_dw += 3;
}

diff --git a/src/gallium/drivers/r600/r600_state_common.c 
b/src/gallium/drivers/r600/r600_state_common.c
index 5cf5208..d629194 100644
--- a/src/gallium/drivers/r600/r600_state_common.c
+++ b/src/gallium/drivers/r600/r600_state_common.c
@@ -1478,7 +1478,7 @@ static void r600_draw_vbo(struct pipe_context *ctx, const 
struct pipe_draw_info
struct pipe_draw_info info = *dinfo;
struct pipe_index_buffer ib = {};
struct radeon_winsys_cs *cs = rctx->b.gfx.cs;
-   bool render_cond_bit = rctx->b.current_render_cond && 
!rctx->b.render_cond_force_off;
+   bool render_cond_bit = rctx->b.render_cond && 
!rctx->b.render_cond_force_off;
uint64_t mask;

if (!info.indirect && !info.count && (info.indexed || 
!info.count_from_stream_output)) {
diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h 
b/src/gallium/drivers/radeon/r600_pipe_common.h
index ba9000f..ebe633b 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.h
+++ b/src/gallium/drivers/radeon/r600_pipe_common.h
@@ -418,9 +418,9 @@ struct r600_common_context {

/* Render condition. */
struct r600_atomrender_cond_atom;
-   struct pipe_query   *current_render_cond;
-   unsignedcurrent_render_cond_mode;
-   boolean current_render_cond_cond;
+   struct pipe_query   *render_cond;
+   unsignedrender_cond_mode;
+   boolean render_cond_invert;
boolrender_cond_force_off; /* for u_blitter 
*/

/* MSAA sample locations.
diff --git a/src/gallium/drivers/radeon/r600_query.c 
b/src/gallium/drivers/radeon/r600_query.c
index 9f92587..8c2b601 100644
--- a/src/gallium/drivers/radeon/r600_query.c
+++ b/src/gallium/drivers/radeon/r600_query.c
@@ -307,7 +307,7 @@ static void r600_emit_query_predication(struct 
r600_common_context *ctx,
struct r600_atom *atom)
  {
struct radeon_winsys_cs *cs = ctx->gfx.cs;
-   struct r600_query *query = (struct r600_query*)ctx->current_render_cond;
+   struct r600_query *query = (struct r600_query*)ctx->render_cond;
struct r600_query_buffer *qbuf;
uint32_t op;
bool flag_wait;
@@ -315,8 +315,8 @@ static void r600_emit_query_predication(struct 
r600_common_context *ctx,
if (!query)
return;

-   flag_wait = ctx->current_render_cond_mode == PIPE_RENDER_COND_WAIT ||
-   ctx->current_render_cond_mode == 
PIPE_RENDER_COND_BY_REGION_WAIT;
+   flag_wait = ctx->render_cond_mode == PIPE_RENDER_COND_WAIT ||
+   ctx->render_cond_mode == PIPE_RENDER_COND_BY_REGION_WAIT;

switch (query->type) {
case PIPE_QUERY_OCCLUSION_COUNTER:
@@ -335,7 +335,7 @@ static void r600_emit_query_predication(struct 
r600_common_context *ctx,
}

/* if true then invert, see GL_ARB_conditional_render_inverted */
-   if (ctx->current_render_cond_cond)
+   if (ctx->render_cond_invert)
op |= PREDICATION_DRAW_NOT_VISIBLE; /* Draw if not 
visable/overflow */
else
op |= PREDICATION_DRAW_VISIBLE; /* Draw if visable/overflow */
@@ -831,9 +831,9 @@ static void r600_render_condition(struct pipe_context *ctx,
struct r600_query_buffer *qbuf;
struct r600_atom *atom = &rctx->render_cond_atom;

-   rctx->current_render_cond = query;
-   rctx->current_render_cond_cond = condition;
-   rctx->current_render_cond_mode = mode;
+   rctx->render_cond = query;
+   rctx->render_cond_invert = condition;
+   rctx->render_cond_mode = mode;

/* Compute the size of SET_PREDICATION packets. */
atom->num_dw = 0;
diff --git a/sr

Re: [Mesa-dev] [PATCH 5/5] radeonsi: calculate optimal GS ring sizes to fix GS hangs on Tonga

2015-11-09 Thread Nicolai Hähnle
sctx)
  {
-   unsigned esgs_ring_size = 128 * 1024;
-   unsigned gsvs_ring_size = 60 * 1024 * 1024;
+   struct si_shader_selector *es =
+   sctx->tes_shader.cso ? sctx->tes_shader.cso : 
sctx->vs_shader.cso;
+   struct si_shader_selector *gs = sctx->gs_shader.cso;
+   struct si_pm4_state *pm4;

-   assert(!sctx->esgs_ring && !sctx->gsvs_ring);
+   /* Chip constants. */
+   unsigned num_se = sctx->screen->b.info.max_se;
+   unsigned wave_size = 64;
+   unsigned max_gs_waves = 32 * num_se; /* max 32 per SE on GCN */
+   unsigned gs_vertex_reuse = 16 * num_se; /* GS_VERTEX_REUSE register 
(per SE) */
+   unsigned alignment = 256 * num_se;
+   /* The maximum size is 63.999 MB per SE. */
+   unsigned max_size = ((unsigned)(63.999 * 1024 * 1024) & ~255) * num_se;
+
+   /* Calculate the minimum size. */
+   unsigned min_esgs_ring_size = align(es->esgs_itemsize * gs_vertex_reuse 
*
+   wave_size, alignment);
+
+   /* These are recommended sizes, not minimum sizes. */
+   unsigned esgs_ring_size = max_gs_waves * 2 * wave_size *
+ es->esgs_itemsize * 
gs->gs_input_verts_per_prim;
+   unsigned gsvs_ring_size = max_gs_waves * 2 * wave_size *
+ gs->max_gsvs_emit_size * (gs->max_gs_stream + 
1);
+
+   min_esgs_ring_size = align(min_esgs_ring_size, alignment);
+   esgs_ring_size = align(esgs_ring_size, alignment);
+   gsvs_ring_size = align(gsvs_ring_size, alignment);
+
+   esgs_ring_size = CLAMP(esgs_ring_size, min_esgs_ring_size, max_size);
+   gsvs_ring_size = MIN2(gsvs_ring_size, max_size);
+
+   /* Some rings don't have to be allocated if shaders don't use them.
+* (e.g. no varyings between ES and GS or GS and PS)
+*/
+   bool update_esgs = esgs_ring_size &&
+  (!sctx->esgs_ring ||
+   sctx->esgs_ring->width0 < esgs_ring_size);
+   bool update_gsvs = gsvs_ring_size &&
+  (!sctx->gsvs_ring ||
+   sctx->gsvs_ring->width0 < gsvs_ring_size);


I take it the comment above should be "or GS and _VS_".

With this, the series is

Reviewed-by: Nicolai Hähnle 



-   sctx->esgs_ring = pipe_buffer_create(sctx->b.b.screen, PIPE_BIND_CUSTOM,
-  PIPE_USAGE_DEFAULT, esgs_ring_size);
-   if (!sctx->esgs_ring)
-   return;
+   if (!update_esgs && !update_gsvs)
+   return true;

-   sctx->gsvs_ring = pipe_buffer_create(sctx->b.b.screen, PIPE_BIND_CUSTOM,
-PIPE_USAGE_DEFAULT, 
gsvs_ring_size);
-   if (!sctx->gsvs_ring) {
+   if (update_esgs) {
pipe_resource_reference(&sctx->esgs_ring, NULL);
-   return;
+   sctx->esgs_ring = pipe_buffer_create(sctx->b.b.screen, 
PIPE_BIND_CUSTOM,
+PIPE_USAGE_DEFAULT,
+esgs_ring_size);
+   if (!sctx->esgs_ring)
+   return false;
}

-   si_init_config_add_vgt_flush(sctx);
+   if (update_gsvs) {
+   pipe_resource_reference(&sctx->gsvs_ring, NULL);
+   sctx->gsvs_ring = pipe_buffer_create(sctx->b.b.screen, 
PIPE_BIND_CUSTOM,
+PIPE_USAGE_DEFAULT,
+gsvs_ring_size);
+   if (!sctx->gsvs_ring)
+   return false;
+   }
+
+   /* Create the "init_config_gs_rings" state. */
+   pm4 = CALLOC_STRUCT(si_pm4_state);
+   if (!pm4)
+   return false;

-   /* Append these registers to the init config state. */
if (sctx->b.chip_class >= CIK) {
-   if (sctx->b.chip_class >= VI) {
-   /* The maximum sizes are 63.999 MB on VI, because
-* the register fields only have 18 bits. */
-   assert(esgs_ring_size / 256 < (1 << 18));
-   assert(gsvs_ring_size / 256 < (1 << 18));
-   }
-   si_pm4_set_reg(sctx->init_config, R_030900_VGT_ESGS_RING_SIZE,
-  esgs_ring_size / 256);
-   si_pm4_set_reg(sctx->init_config, R_030904_VGT_GSVS_RING_SIZE,
-  gsvs_ring_size / 256);
+   if (sctx->esgs_ring)
+   si_pm4_set_reg(pm4, R_030900_VGT_ESGS_RING_SIZE,
+  sctx->esgs_ring->width0 / 256);
+   if (sctx->gsvs_ring)

[Mesa-dev] [PATCH 4/9] st/mesa: store mapping from perfmon counter to query type

2015-11-13 Thread Nicolai Hähnle
Previously, when a performance monitor was initialized, an inner loop through
all driver queries with string comparisons for each enabled performance
monitor counter was used. This hurts when a driver exposes lots of queries.
---
 src/mesa/state_tracker/st_cb_perfmon.c | 74 +++---
 src/mesa/state_tracker/st_cb_perfmon.h | 14 +++
 src/mesa/state_tracker/st_context.h|  3 ++
 3 files changed, 49 insertions(+), 42 deletions(-)

diff --git a/src/mesa/state_tracker/st_cb_perfmon.c 
b/src/mesa/state_tracker/st_cb_perfmon.c
index dedb8f5..80ff170 100644
--- a/src/mesa/state_tracker/st_cb_perfmon.c
+++ b/src/mesa/state_tracker/st_cb_perfmon.c
@@ -36,48 +36,20 @@
 #include "pipe/p_screen.h"
 #include "util/u_memory.h"
 
-/**
- * Return a PIPE_QUERY_x type >= PIPE_QUERY_DRIVER_SPECIFIC, or -1 if
- * the driver-specific query doesn't exist.
- */
-static int
-find_query_type(struct pipe_screen *screen, const char *name)
-{
-   int num_queries;
-   int type = -1;
-   int i;
-
-   num_queries = screen->get_driver_query_info(screen, 0, NULL);
-   if (!num_queries)
-  return type;
-
-   for (i = 0; i < num_queries; i++) {
-  struct pipe_driver_query_info info;
-
-  if (!screen->get_driver_query_info(screen, i, &info))
- continue;
-
-  if (!strncmp(info.name, name, strlen(name))) {
- type = info.query_type;
- break;
-  }
-   }
-   return type;
-}
-
 static bool
 init_perf_monitor(struct gl_context *ctx, struct gl_perf_monitor_object *m)
 {
+   struct st_context *st = st_context(ctx);
struct st_perf_monitor_object *stm = st_perf_monitor_object(m);
-   struct pipe_screen *screen = st_context(ctx)->pipe->screen;
-   struct pipe_context *pipe = st_context(ctx)->pipe;
+   struct pipe_context *pipe = st->pipe;
int gid, cid;
 
-   st_flush_bitmap_cache(st_context(ctx));
+   st_flush_bitmap_cache(st);
 
/* Create a query for each active counter. */
for (gid = 0; gid < ctx->PerfMonitor.NumGroups; gid++) {
   const struct gl_perf_monitor_group *g = &ctx->PerfMonitor.Groups[gid];
+  const struct st_perf_monitor_group *stg = &st->perfmon[gid];
 
   if (m->ActiveGroups[gid] > g->MaxActiveCounters) {
  /* Maximum number of counters reached. Cannot start the session. */
@@ -90,20 +62,17 @@ init_perf_monitor(struct gl_context *ctx, struct 
gl_perf_monitor_object *m)
 
   for (cid = 0; cid < g->NumCounters; cid++) {
  const struct gl_perf_monitor_counter *c = &g->Counters[cid];
+ const struct st_perf_monitor_counter *stc = &stg->counters[cid];
  struct st_perf_counter_object *cntr;
- int query_type;
 
  if (!BITSET_TEST(m->ActiveCounters[gid], cid))
 continue;
 
- query_type = find_query_type(screen, c->Name);
- assert(query_type != -1);
-
  cntr = CALLOC_STRUCT(st_perf_counter_object);
  if (!cntr)
 return false;
 
- cntr->query= pipe->create_query(pipe, query_type, 0);
+ cntr->query= pipe->create_query(pipe, stc->query_type, 0);
  cntr->id   = cid;
  cntr->group_id = gid;
 
@@ -286,6 +255,7 @@ st_init_perfmon(struct st_context *st)
struct gl_perf_monitor_state *perfmon = &st->ctx->PerfMonitor;
struct pipe_screen *screen = st->pipe->screen;
struct gl_perf_monitor_group *groups = NULL;
+   struct st_perf_monitor_group *stgroups = NULL;
int num_counters, num_groups;
int gid, cid;
 
@@ -304,26 +274,36 @@ st_init_perfmon(struct st_context *st)
if (!groups)
   return false;
 
+   stgroups = CALLOC(num_groups, sizeof(*stgroups));
+   if (!stgroups)
+  goto fail_only_groups;
+
for (gid = 0; gid < num_groups; gid++) {
   struct gl_perf_monitor_group *g = &groups[perfmon->NumGroups];
   struct pipe_driver_query_group_info group_info;
   struct gl_perf_monitor_counter *counters = NULL;
+  struct st_perf_monitor_counter *stcounters = NULL;
 
   if (!screen->get_driver_query_group_info(screen, gid, &group_info))
  continue;
 
   g->Name = group_info.name;
   g->MaxActiveCounters = group_info.max_active_queries;
-  g->NumCounters = 0;
-  g->Counters = NULL;
 
   if (group_info.num_queries)
  counters = CALLOC(group_info.num_queries, sizeof(*counters));
   if (!counters)
  goto fail;
+  g->Counters = counters;
+
+  stcounters = CALLOC(group_info.num_queries, sizeof(*stcounters));
+  if (!stcounters)
+ goto fail;
+  stgroups[perfmon->NumGroups].counters = stcounters;
 
   for (cid = 0; cid < num_counters; cid++) {
  struct gl_perf_monitor_counter *c = &counters[g->NumCounters];
+ struct st_perf_monitor_counter *stc = &stcounters[g->NumCounters];
  struct pipe_driver_query_info info;
 
  if (!screen->get_driver_query_info(screen, cid, &info))
@@ -359,18 +339,25 @@ st_init_perfmon(struct st_context *st)
 default:
   

[Mesa-dev] [PATCH 5/9] st/mesa: use BITSET_FOREACH_SET to loop through active perfmon counters

2015-11-13 Thread Nicolai Hähnle
---
 src/mesa/state_tracker/st_cb_perfmon.c | 7 ++-
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/src/mesa/state_tracker/st_cb_perfmon.c 
b/src/mesa/state_tracker/st_cb_perfmon.c
index 80ff170..ec12eb2 100644
--- a/src/mesa/state_tracker/st_cb_perfmon.c
+++ b/src/mesa/state_tracker/st_cb_perfmon.c
@@ -50,6 +50,7 @@ init_perf_monitor(struct gl_context *ctx, struct 
gl_perf_monitor_object *m)
for (gid = 0; gid < ctx->PerfMonitor.NumGroups; gid++) {
   const struct gl_perf_monitor_group *g = &ctx->PerfMonitor.Groups[gid];
   const struct st_perf_monitor_group *stg = &st->perfmon[gid];
+  BITSET_WORD tmp;
 
   if (m->ActiveGroups[gid] > g->MaxActiveCounters) {
  /* Maximum number of counters reached. Cannot start the session. */
@@ -60,14 +61,10 @@ init_perf_monitor(struct gl_context *ctx, struct 
gl_perf_monitor_object *m)
  return false;
   }
 
-  for (cid = 0; cid < g->NumCounters; cid++) {
- const struct gl_perf_monitor_counter *c = &g->Counters[cid];
+  BITSET_FOREACH_SET(cid, tmp, m->ActiveCounters[gid], g->NumCounters) {
  const struct st_perf_monitor_counter *stc = &stg->counters[cid];
  struct st_perf_counter_object *cntr;
 
- if (!BITSET_TEST(m->ActiveCounters[gid], cid))
-continue;
-
  cntr = CALLOC_STRUCT(st_perf_counter_object);
  if (!cntr)
 return false;
-- 
2.5.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 0/9] gallium: batch query objects and related cleanups

2015-11-13 Thread Nicolai Hähnle
Hi,

the main point of this patch series is to introduce batch query objects.

For AMD_performance_monitor, hardware may not be able to start and stop
performance counters independently of each other. The current query interface
does not fit such hardware well.

With this series, drivers can mark driver-specific queries with the
PIPE_DRIVER_QUERY_FLAG_BATCH flag, which indicates that those queries require
the use of batch query objects. Batch query objects are created with an
immutable list of queries, which requires a new entry point in pipe_context,
but apart from that they use the same begin_query/end_query/etc. entry points.

The radeon-specific part that actually makes use of this feature is not quite
ready yet, but I already wanted to get this part out there for feedback.
Please review!

Thanks,
Nicolai
---
 gallium/auxiliary/hud/hud_context.c   |   24 ++
 gallium/auxiliary/hud/hud_driver_query.c  |  249 +-
 gallium/auxiliary/hud/hud_private.h   |   13 +
 gallium/drivers/nouveau/nvc0/nvc0_query.c |4 
 gallium/include/pipe/p_context.h  |3 
 gallium/include/pipe/p_defines.h  |   36 ++--
 mesa/state_tracker/st_cb_perfmon.c|  247 -
 mesa/state_tracker/st_cb_perfmon.h|   32 +++
 mesa/state_tracker/st_context.h   |3 
 9 files changed, 437 insertions(+), 174 deletions(-)

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 8/9] gallium/hud: add support for batch queries

2015-11-13 Thread Nicolai Hähnle
---
 src/gallium/auxiliary/hud/hud_context.c  |  24 ++-
 src/gallium/auxiliary/hud/hud_driver_query.c | 248 +++
 src/gallium/auxiliary/hud/hud_private.h  |  13 +-
 3 files changed, 240 insertions(+), 45 deletions(-)

diff --git a/src/gallium/auxiliary/hud/hud_context.c 
b/src/gallium/auxiliary/hud/hud_context.c
index ffe30b8..bcef701 100644
--- a/src/gallium/auxiliary/hud/hud_context.c
+++ b/src/gallium/auxiliary/hud/hud_context.c
@@ -57,6 +57,7 @@ struct hud_context {
struct cso_context *cso;
struct u_upload_mgr *uploader;
 
+   struct hud_batch_query_context *batch_query;
struct list_head pane_list;
 
/* states */
@@ -510,6 +511,8 @@ hud_draw(struct hud_context *hud, struct pipe_resource *tex)
hud_alloc_vertices(hud, &hud->text, 4 * 512, 4 * sizeof(float));
 
/* prepare all graphs */
+   hud_batch_query_update(hud->batch_query);
+
LIST_FOR_EACH_ENTRY(pane, &hud->pane_list, head) {
   LIST_FOR_EACH_ENTRY(gr, &pane->graph_list, head) {
  gr->query_new_value(gr);
@@ -903,17 +906,21 @@ hud_parse_env_var(struct hud_context *hud, const char 
*env)
   }
   else if (strcmp(name, "samples-passed") == 0 &&
has_occlusion_query(hud->pipe->screen)) {
- hud_pipe_query_install(pane, hud->pipe, "samples-passed",
+ hud_pipe_query_install(&hud->batch_query, pane, hud->pipe,
+"samples-passed",
 PIPE_QUERY_OCCLUSION_COUNTER, 0, 0,
 PIPE_DRIVER_QUERY_TYPE_UINT64,
-PIPE_DRIVER_QUERY_RESULT_TYPE_AVERAGE);
+PIPE_DRIVER_QUERY_RESULT_TYPE_AVERAGE,
+0);
   }
   else if (strcmp(name, "primitives-generated") == 0 &&
has_streamout(hud->pipe->screen)) {
- hud_pipe_query_install(pane, hud->pipe, "primitives-generated",
+ hud_pipe_query_install(&hud->batch_query, pane, hud->pipe,
+"primitives-generated",
 PIPE_QUERY_PRIMITIVES_GENERATED, 0, 0,
 PIPE_DRIVER_QUERY_TYPE_UINT64,
-PIPE_DRIVER_QUERY_RESULT_TYPE_AVERAGE);
+PIPE_DRIVER_QUERY_RESULT_TYPE_AVERAGE,
+0);
   }
   else {
  boolean processed = FALSE;
@@ -938,17 +945,19 @@ hud_parse_env_var(struct hud_context *hud, const char 
*env)
if (strcmp(name, pipeline_statistics_names[i]) == 0)
   break;
 if (i < Elements(pipeline_statistics_names)) {
-   hud_pipe_query_install(pane, hud->pipe, name,
+   hud_pipe_query_install(&hud->batch_query, pane, hud->pipe, name,
   PIPE_QUERY_PIPELINE_STATISTICS, i,
   0, PIPE_DRIVER_QUERY_TYPE_UINT64,
-  PIPE_DRIVER_QUERY_RESULT_TYPE_AVERAGE);
+  PIPE_DRIVER_QUERY_RESULT_TYPE_AVERAGE,
+  0);
processed = TRUE;
 }
  }
 
  /* driver queries */
  if (!processed) {
-if (!hud_driver_query_install(pane, hud->pipe, name)){
+if (!hud_driver_query_install(&hud->batch_query, pane, hud->pipe,
+  name)) {
fprintf(stderr, "gallium_hud: unknown driver query '%s'\n", 
name);
 }
  }
@@ -1287,6 +1296,7 @@ hud_destroy(struct hud_context *hud)
   FREE(pane);
}
 
+   hud_batch_query_cleanup(&hud->batch_query);
pipe->delete_fs_state(pipe, hud->fs_color);
pipe->delete_fs_state(pipe, hud->fs_text);
pipe->delete_vs_state(pipe, hud->vs);
diff --git a/src/gallium/auxiliary/hud/hud_driver_query.c 
b/src/gallium/auxiliary/hud/hud_driver_query.c
index 3198ab3..abc9f54 100644
--- a/src/gallium/auxiliary/hud/hud_driver_query.c
+++ b/src/gallium/auxiliary/hud/hud_driver_query.c
@@ -34,13 +34,149 @@
 #include "hud/hud_private.h"
 #include "pipe/p_screen.h"
 #include "os/os_time.h"
+#include "util/u_math.h"
 #include "util/u_memory.h"
 #include 
 
+// Must be a power of two
 #define NUM_QUERIES 8
 
+struct hud_batch_query_context {
+   struct pipe_context *pipe;
+   unsigned num_query_types;
+   unsigned allocated_query_types;
+   unsigned *query_types;
+
+   boolean failed;
+   struct pipe_query *query[NUM_QUERIES];
+   union pipe_query_result *result[NUM_QUERIES];
+   unsigned head, pending, results;
+};
+
+void
+hud_batch_query_update(struct hud_batch_query_context *bq)
+{
+   struct pipe_context *pipe;
+
+   if (!bq || bq->failed)
+  return;
+
+   pipe = bq->pipe;
+
+   if (bq->query[bq->head])
+  pipe->end_query(pipe, bq->query[bq->head]);
+
+   bq->results = 0;
+
+   while (bq->pending) {
+  unsigned idx = (bq->head - 

[Mesa-dev] [PATCH 6/9] st/mesa: maintain active perfmon counters in an array

2015-11-13 Thread Nicolai Hähnle
It is easy enough to pre-determine the required size, and arrays are
generally better behaved especially when they get large.
---
 src/mesa/state_tracker/st_cb_perfmon.c | 78 --
 src/mesa/state_tracker/st_cb_perfmon.h | 18 
 2 files changed, 55 insertions(+), 41 deletions(-)

diff --git a/src/mesa/state_tracker/st_cb_perfmon.c 
b/src/mesa/state_tracker/st_cb_perfmon.c
index ec12eb2..6c71a13 100644
--- a/src/mesa/state_tracker/st_cb_perfmon.c
+++ b/src/mesa/state_tracker/st_cb_perfmon.c
@@ -42,15 +42,14 @@ init_perf_monitor(struct gl_context *ctx, struct 
gl_perf_monitor_object *m)
struct st_context *st = st_context(ctx);
struct st_perf_monitor_object *stm = st_perf_monitor_object(m);
struct pipe_context *pipe = st->pipe;
+   unsigned num_active_counters = 0;
int gid, cid;
 
st_flush_bitmap_cache(st);
 
-   /* Create a query for each active counter. */
+   /* Determine the number of active counters. */
for (gid = 0; gid < ctx->PerfMonitor.NumGroups; gid++) {
   const struct gl_perf_monitor_group *g = &ctx->PerfMonitor.Groups[gid];
-  const struct st_perf_monitor_group *stg = &st->perfmon[gid];
-  BITSET_WORD tmp;
 
   if (m->ActiveGroups[gid] > g->MaxActiveCounters) {
  /* Maximum number of counters reached. Cannot start the session. */
@@ -61,19 +60,29 @@ init_perf_monitor(struct gl_context *ctx, struct 
gl_perf_monitor_object *m)
  return false;
   }
 
+  num_active_counters += m->ActiveGroups[gid];
+   }
+
+   stm->active_counters = CALLOC(num_active_counters,
+ sizeof(*stm->active_counters));
+   if (!stm->active_counters)
+  return false;
+
+   /* Create a query for each active counter. */
+   for (gid = 0; gid < ctx->PerfMonitor.NumGroups; gid++) {
+  const struct gl_perf_monitor_group *g = &ctx->PerfMonitor.Groups[gid];
+  const struct st_perf_monitor_group *stg = &st->perfmon[gid];
+  BITSET_WORD tmp;
+
   BITSET_FOREACH_SET(cid, tmp, m->ActiveCounters[gid], g->NumCounters) {
  const struct st_perf_monitor_counter *stc = &stg->counters[cid];
- struct st_perf_counter_object *cntr;
-
- cntr = CALLOC_STRUCT(st_perf_counter_object);
- if (!cntr)
-return false;
+ struct st_perf_counter_object *cntr =
+&stm->active_counters[stm->num_active_counters];
 
  cntr->query= pipe->create_query(pipe, stc->query_type, 0);
  cntr->id   = cid;
  cntr->group_id = gid;
-
- list_addtail(&cntr->list, &stm->active_counters);
+ ++stm->num_active_counters;
   }
}
return true;
@@ -83,24 +92,24 @@ static void
 reset_perf_monitor(struct st_perf_monitor_object *stm,
struct pipe_context *pipe)
 {
-   struct st_perf_counter_object *cntr, *tmp;
+   unsigned i;
 
-   LIST_FOR_EACH_ENTRY_SAFE(cntr, tmp, &stm->active_counters, list) {
-  if (cntr->query)
- pipe->destroy_query(pipe, cntr->query);
-  list_del(&cntr->list);
-  free(cntr);
+   for (i = 0; i < stm->num_active_counters; ++i) {
+  struct pipe_query *query = stm->active_counters[i].query;
+  if (query)
+ pipe->destroy_query(pipe, query);
}
+   FREE(stm->active_counters);
+   stm->active_counters = NULL;
+   stm->num_active_counters = 0;
 }
 
 static struct gl_perf_monitor_object *
 st_NewPerfMonitor(struct gl_context *ctx)
 {
struct st_perf_monitor_object *stq = 
ST_CALLOC_STRUCT(st_perf_monitor_object);
-   if (stq) {
-  list_inithead(&stq->active_counters);
+   if (stq)
   return &stq->base;
-   }
return NULL;
 }
 
@@ -119,9 +128,9 @@ st_BeginPerfMonitor(struct gl_context *ctx, struct 
gl_perf_monitor_object *m)
 {
struct st_perf_monitor_object *stm = st_perf_monitor_object(m);
struct pipe_context *pipe = st_context(ctx)->pipe;
-   struct st_perf_counter_object *cntr;
+   unsigned i;
 
-   if (LIST_IS_EMPTY(&stm->active_counters)) {
+   if (!stm->num_active_counters) {
   /* Create a query for each active counter before starting
* a new monitoring session. */
   if (!init_perf_monitor(ctx, m))
@@ -129,8 +138,9 @@ st_BeginPerfMonitor(struct gl_context *ctx, struct 
gl_perf_monitor_object *m)
}
 
/* Start the query for each active counter. */
-   LIST_FOR_EACH_ENTRY(cntr, &stm->active_counters, list) {
-  if (!pipe->begin_query(pipe, cntr->query))
+   for (i = 0; i < stm->num_active_counters; ++i) {
+  struct pipe_query *query = stm->active_counters[i].query;
+  if (!pipe->begin_query(pipe, query))
   goto fail;
}
return true;
@@ -146,11 +156,13 @@ st_EndPerfMonitor(struct gl_context *ctx, struct 
gl_perf_monitor_object *m)
 {
struct st_perf_monitor_object *stm = st_perf_monitor_object(m);
struct pipe_context *pipe = st_context(ctx)->pipe;
-   struct st_perf_counter_object *cntr;
+   unsigned i;
 
/* Stop the query for each active counter. */
-   LIST_FOR

[Mesa-dev] [PATCH 3/9] st/mesa: map semantic driver query types to underlying type

2015-11-13 Thread Nicolai Hähnle
---
 src/gallium/include/pipe/p_defines.h   | 2 ++
 src/mesa/state_tracker/st_cb_perfmon.c | 3 +++
 2 files changed, 5 insertions(+)

diff --git a/src/gallium/include/pipe/p_defines.h 
b/src/gallium/include/pipe/p_defines.h
index 7f241c8..7ed9f6d 100644
--- a/src/gallium/include/pipe/p_defines.h
+++ b/src/gallium/include/pipe/p_defines.h
@@ -791,6 +791,8 @@ union pipe_query_result
/* PIPE_QUERY_PRIMITIVES_GENERATED */
/* PIPE_QUERY_PRIMITIVES_EMITTED */
/* PIPE_DRIVER_QUERY_TYPE_UINT64 */
+   /* PIPE_DRIVER_QUERY_TYPE_BYTES */
+   /* PIPE_DRIVER_QUERY_TYPE_MICROSECONDS */
/* PIPE_DRIVER_QUERY_TYPE_HZ */
uint64_t u64;
 
diff --git a/src/mesa/state_tracker/st_cb_perfmon.c 
b/src/mesa/state_tracker/st_cb_perfmon.c
index 4ec6d86..dedb8f5 100644
--- a/src/mesa/state_tracker/st_cb_perfmon.c
+++ b/src/mesa/state_tracker/st_cb_perfmon.c
@@ -334,6 +334,9 @@ st_init_perfmon(struct st_context *st)
  c->Name = info.name;
  switch (info.type) {
 case PIPE_DRIVER_QUERY_TYPE_UINT64:
+case PIPE_DRIVER_QUERY_TYPE_BYTES:
+case PIPE_DRIVER_QUERY_TYPE_MICROSECONDS:
+case PIPE_DRIVER_QUERY_TYPE_HZ:
c->Minimum.u64 = 0;
c->Maximum.u64 = info.max_value.u64 ? info.max_value.u64 : -1;
c->Type = GL_UNSIGNED_INT64_AMD;
-- 
2.5.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 7/9] gallium: add the concept of batch queries

2015-11-13 Thread Nicolai Hähnle
Some drivers (in particular radeon[si], but also freedreno judging from
a quick grep) may want to expose performance counters that cannot be
individually enabled or disabled.

Allow such drivers to mark driver-specific queries as requiring a new
type of batch query object that is used to start and stop a list of queries
simultaneously.
---
 src/gallium/drivers/nouveau/nvc0/nvc0_query.c |  1 +
 src/gallium/include/pipe/p_context.h  |  3 +++
 src/gallium/include/pipe/p_defines.h  | 27 +--
 3 files changed, 25 insertions(+), 6 deletions(-)

diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_query.c 
b/src/gallium/drivers/nouveau/nvc0/nvc0_query.c
index a1d6162..0608337 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_query.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_query.c
@@ -162,6 +162,7 @@ nvc0_screen_get_driver_query_info(struct pipe_screen 
*pscreen,
info->max_value.u64 = 0;
info->type = PIPE_DRIVER_QUERY_TYPE_UINT64;
info->group_id = -1;
+   info->flags = 0;
 
 #ifdef NOUVEAU_ENABLE_DRIVER_STATISTICS
if (id < num_sw_queries)
diff --git a/src/gallium/include/pipe/p_context.h 
b/src/gallium/include/pipe/p_context.h
index 27f358f..f122c74 100644
--- a/src/gallium/include/pipe/p_context.h
+++ b/src/gallium/include/pipe/p_context.h
@@ -115,6 +115,9 @@ struct pipe_context {
struct pipe_query *(*create_query)( struct pipe_context *pipe,
unsigned query_type,
unsigned index );
+   struct pipe_query *(*create_batch_query)( struct pipe_context *pipe,
+ unsigned num_queries,
+ unsigned *query_types );
 
void (*destroy_query)(struct pipe_context *pipe,
  struct pipe_query *q);
diff --git a/src/gallium/include/pipe/p_defines.h 
b/src/gallium/include/pipe/p_defines.h
index 7ed9f6d..b3c8b9f 100644
--- a/src/gallium/include/pipe/p_defines.h
+++ b/src/gallium/include/pipe/p_defines.h
@@ -776,6 +776,16 @@ struct pipe_query_data_pipeline_statistics
 };
 
 /**
+ * For batch queries.
+ */
+union pipe_numeric_type_union
+{
+   uint64_t u64;
+   uint32_t u32;
+   float f;
+};
+
+/**
  * Query result (returned by pipe_context::get_query_result).
  */
 union pipe_query_result
@@ -811,6 +821,9 @@ union pipe_query_result
 
/* PIPE_QUERY_PIPELINE_STATISTICS */
struct pipe_query_data_pipeline_statistics pipeline_statistics;
+
+   /* batch queries */
+   union pipe_numeric_type_union batch[0];
 };
 
 union pipe_color_union
@@ -840,12 +853,13 @@ enum pipe_driver_query_result_type
PIPE_DRIVER_QUERY_RESULT_TYPE_CUMULATIVE = 1,
 };
 
-union pipe_numeric_type_union
-{
-   uint64_t u64;
-   uint32_t u32;
-   float f;
-};
+/**
+ * Some hardware requires some hardware-specific queries to be submitted
+ * as batched queries. The corresponding query objects are created using
+ * create_batch_query, and at most one such query may be active at
+ * any time.
+ */
+#define PIPE_DRIVER_QUERY_FLAG_BATCH (1 << 0)
 
 struct pipe_driver_query_info
 {
@@ -855,6 +869,7 @@ struct pipe_driver_query_info
enum pipe_driver_query_type type;
enum pipe_driver_query_result_type result_type;
unsigned group_id;
+   unsigned flags;
 };
 
 struct pipe_driver_query_group_info
-- 
2.5.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 9/9] st/mesa: add support for batch driver queries to perfmon

2015-11-13 Thread Nicolai Hähnle
---
 src/mesa/state_tracker/st_cb_perfmon.c | 75 ++
 src/mesa/state_tracker/st_cb_perfmon.h |  6 +++
 2 files changed, 74 insertions(+), 7 deletions(-)

diff --git a/src/mesa/state_tracker/st_cb_perfmon.c 
b/src/mesa/state_tracker/st_cb_perfmon.c
index 6c71a13..078d2c4 100644
--- a/src/mesa/state_tracker/st_cb_perfmon.c
+++ b/src/mesa/state_tracker/st_cb_perfmon.c
@@ -42,7 +42,10 @@ init_perf_monitor(struct gl_context *ctx, struct 
gl_perf_monitor_object *m)
struct st_context *st = st_context(ctx);
struct st_perf_monitor_object *stm = st_perf_monitor_object(m);
struct pipe_context *pipe = st->pipe;
+   unsigned *batch = NULL;
unsigned num_active_counters = 0;
+   unsigned max_batch_counters = 0;
+   unsigned num_batch_counters = 0;
int gid, cid;
 
st_flush_bitmap_cache(st);
@@ -50,6 +53,7 @@ init_perf_monitor(struct gl_context *ctx, struct 
gl_perf_monitor_object *m)
/* Determine the number of active counters. */
for (gid = 0; gid < ctx->PerfMonitor.NumGroups; gid++) {
   const struct gl_perf_monitor_group *g = &ctx->PerfMonitor.Groups[gid];
+  const struct st_perf_monitor_group *stg = &st->perfmon[gid];
 
   if (m->ActiveGroups[gid] > g->MaxActiveCounters) {
  /* Maximum number of counters reached. Cannot start the session. */
@@ -61,6 +65,8 @@ init_perf_monitor(struct gl_context *ctx, struct 
gl_perf_monitor_object *m)
   }
 
   num_active_counters += m->ActiveGroups[gid];
+  if (stg->has_batch)
+ max_batch_counters += m->ActiveGroups[gid];
}
 
stm->active_counters = CALLOC(num_active_counters,
@@ -68,6 +74,9 @@ init_perf_monitor(struct gl_context *ctx, struct 
gl_perf_monitor_object *m)
if (!stm->active_counters)
   return false;
 
+   if (max_batch_counters)
+  batch = CALLOC(max_batch_counters, sizeof(*batch));
+
/* Create a query for each active counter. */
for (gid = 0; gid < ctx->PerfMonitor.NumGroups; gid++) {
   const struct gl_perf_monitor_group *g = &ctx->PerfMonitor.Groups[gid];
@@ -79,13 +88,35 @@ init_perf_monitor(struct gl_context *ctx, struct 
gl_perf_monitor_object *m)
  struct st_perf_counter_object *cntr =
 &stm->active_counters[stm->num_active_counters];
 
- cntr->query= pipe->create_query(pipe, stc->query_type, 0);
  cntr->id   = cid;
  cntr->group_id = gid;
+ if (stc->flags & PIPE_DRIVER_QUERY_FLAG_BATCH) {
+cntr->batch_index = num_batch_counters;
+batch[num_batch_counters++] = stc->query_type;
+ } else {
+cntr->query = pipe->create_query(pipe, stc->query_type, 0);
+if (!cntr->query)
+   goto fail;
+ }
  ++stm->num_active_counters;
   }
}
+
+   /* Create the batch query. */
+   if (num_batch_counters) {
+  stm->batch_query = pipe->create_batch_query(pipe, num_batch_counters,
+  batch);
+  stm->batch_result = CALLOC(num_batch_counters, 
sizeof(stm->batch_result->batch[0]));
+  if (!stm->batch_query || !stm->batch_result)
+ goto fail;
+   }
+
+   FREE(batch);
return true;
+
+fail:
+   FREE(batch);
+   return false;
 }
 
 static void
@@ -102,6 +133,13 @@ reset_perf_monitor(struct st_perf_monitor_object *stm,
FREE(stm->active_counters);
stm->active_counters = NULL;
stm->num_active_counters = 0;
+
+   if (stm->batch_query) {
+  pipe->destroy_query(pipe, stm->batch_query);
+  stm->batch_query = NULL;
+   }
+   FREE(stm->batch_result);
+   stm->batch_result = NULL;
 }
 
 static struct gl_perf_monitor_object *
@@ -140,9 +178,13 @@ st_BeginPerfMonitor(struct gl_context *ctx, struct 
gl_perf_monitor_object *m)
/* Start the query for each active counter. */
for (i = 0; i < stm->num_active_counters; ++i) {
   struct pipe_query *query = stm->active_counters[i].query;
-  if (!pipe->begin_query(pipe, query))
+  if (query && !pipe->begin_query(pipe, query))
   goto fail;
}
+
+   if (stm->batch_query && !pipe->begin_query(pipe, stm->batch_query))
+  goto fail;
+
return true;
 
 fail:
@@ -161,8 +203,12 @@ st_EndPerfMonitor(struct gl_context *ctx, struct 
gl_perf_monitor_object *m)
/* Stop the query for each active counter. */
for (i = 0; i < stm->num_active_counters; ++i) {
   struct pipe_query *query = stm->active_counters[i].query;
-  pipe->end_query(pipe, query);
+  if (query)
+ pipe->end_query(pipe, query);
}
+
+   if (stm->batch_query)
+  pipe->end_query(pipe, stm->batch_query);
 }
 
 static void
@@ -196,11 +242,16 @@ st_IsPerfMonitorResultAvailable(struct gl_context *ctx,
for (i = 0; i < stm->num_active_counters; ++i) {
   struct pipe_query *query = stm->active_counters[i].query;
   union pipe_query_result result;
-  if (!pipe->get_query_result(pipe, query, FALSE, &result)) {
+  if (query && !pipe->get_query_result(pipe, query, 

[Mesa-dev] [PATCH 2/9] gallium/hud: remove unused field in query_info

2015-11-13 Thread Nicolai Hähnle
---
 src/gallium/auxiliary/hud/hud_driver_query.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/gallium/auxiliary/hud/hud_driver_query.c 
b/src/gallium/auxiliary/hud/hud_driver_query.c
index f14305e..3198ab3 100644
--- a/src/gallium/auxiliary/hud/hud_driver_query.c
+++ b/src/gallium/auxiliary/hud/hud_driver_query.c
@@ -48,7 +48,6 @@ struct query_info {
/* Ring of queries. If a query is busy, we use another slot. */
struct pipe_query *query[NUM_QUERIES];
unsigned head, tail;
-   unsigned num_queries;
 
uint64_t last_time;
uint64_t results_cumulative;
-- 
2.5.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/9] gallium: remove pipe_driver_query_group_info field type

2015-11-13 Thread Nicolai Hähnle
This was only used to implement an unnecessarily restrictive interpretation
of the spec of AMD_performance_monitor. The spec says

  A performance monitor consists of a number of hardware and software
  counters that can be sampled by the GPU and reported back to the
  application.

I guess one could take this as a requirement that counters _must_ be sampled
by the GPU, but then why are they called _software_ counters? Besides,
there's not much reason _not_ to expose all counters that are available,
and this simplifies the code.
---
 src/gallium/drivers/nouveau/nvc0/nvc0_query.c |  3 ---
 src/gallium/include/pipe/p_defines.h  |  7 ---
 src/mesa/state_tracker/st_cb_perfmon.c| 30 ---
 3 files changed, 40 deletions(-)

diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_query.c 
b/src/gallium/drivers/nouveau/nvc0/nvc0_query.c
index f539210..a1d6162 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_query.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_query.c
@@ -200,7 +200,6 @@ nvc0_screen_get_driver_query_group_info(struct pipe_screen 
*pscreen,
if (id == NVC0_HW_SM_QUERY_GROUP) {
   if (screen->compute) {
  info->name = "MP counters";
- info->type = PIPE_DRIVER_QUERY_GROUP_TYPE_GPU;
 
  /* Because we can't expose the number of hardware counters needed for
   * each different query, we don't want to allow more than one active
@@ -224,7 +223,6 @@ nvc0_screen_get_driver_query_group_info(struct pipe_screen 
*pscreen,
   if (screen->compute) {
  if (screen->base.class_3d < NVE4_3D_CLASS) {
 info->name = "Performance metrics";
-info->type = PIPE_DRIVER_QUERY_GROUP_TYPE_GPU;
 info->max_active_queries = 1;
 info->num_queries = NVC0_HW_METRIC_QUERY_COUNT;
 return 1;
@@ -234,7 +232,6 @@ nvc0_screen_get_driver_query_group_info(struct pipe_screen 
*pscreen,
 #ifdef NOUVEAU_ENABLE_DRIVER_STATISTICS
else if (id == NVC0_SW_QUERY_DRV_STAT_GROUP) {
   info->name = "Driver statistics";
-  info->type = PIPE_DRIVER_QUERY_GROUP_TYPE_CPU;
   info->max_active_queries = NVC0_SW_QUERY_DRV_STAT_COUNT;
   info->num_queries = NVC0_SW_QUERY_DRV_STAT_COUNT;
   return 1;
diff --git a/src/gallium/include/pipe/p_defines.h 
b/src/gallium/include/pipe/p_defines.h
index 7240154..7f241c8 100644
--- a/src/gallium/include/pipe/p_defines.h
+++ b/src/gallium/include/pipe/p_defines.h
@@ -829,12 +829,6 @@ enum pipe_driver_query_type
PIPE_DRIVER_QUERY_TYPE_HZ   = 6,
 };
 
-enum pipe_driver_query_group_type
-{
-   PIPE_DRIVER_QUERY_GROUP_TYPE_CPU = 0,
-   PIPE_DRIVER_QUERY_GROUP_TYPE_GPU = 1,
-};
-
 /* Whether an average value per frame or a cumulative value should be
  * displayed.
  */
@@ -864,7 +858,6 @@ struct pipe_driver_query_info
 struct pipe_driver_query_group_info
 {
const char *name;
-   enum pipe_driver_query_group_type type;
unsigned max_active_queries;
unsigned num_queries;
 };
diff --git a/src/mesa/state_tracker/st_cb_perfmon.c 
b/src/mesa/state_tracker/st_cb_perfmon.c
index 1bb5be3..4ec6d86 100644
--- a/src/mesa/state_tracker/st_cb_perfmon.c
+++ b/src/mesa/state_tracker/st_cb_perfmon.c
@@ -65,27 +65,6 @@ find_query_type(struct pipe_screen *screen, const char *name)
return type;
 }
 
-/**
- * Return TRUE if the underlying driver expose GPU counters.
- */
-static bool
-has_gpu_counters(struct pipe_screen *screen)
-{
-   int num_groups, gid;
-
-   num_groups = screen->get_driver_query_group_info(screen, 0, NULL);
-   for (gid = 0; gid < num_groups; gid++) {
-  struct pipe_driver_query_group_info group_info;
-
-  if (!screen->get_driver_query_group_info(screen, gid, &group_info))
- continue;
-
-  if (group_info.type == PIPE_DRIVER_QUERY_GROUP_TYPE_GPU)
- return true;
-   }
-   return false;
-}
-
 static bool
 init_perf_monitor(struct gl_context *ctx, struct gl_perf_monitor_object *m)
 {
@@ -313,12 +292,6 @@ st_init_perfmon(struct st_context *st)
if (!screen->get_driver_query_info || !screen->get_driver_query_group_info)
   return false;
 
-   if (!has_gpu_counters(screen)) {
-  /* According to the spec, GL_AMD_performance_monitor must only
-   * expose GPU counters. */
-  return false;
-   }
-
/* Get the number of available queries. */
num_counters = screen->get_driver_query_info(screen, 0, NULL);
if (!num_counters)
@@ -339,9 +312,6 @@ st_init_perfmon(struct st_context *st)
   if (!screen->get_driver_query_group_info(screen, gid, &group_info))
  continue;
 
-  if (group_info.type != PIPE_DRIVER_QUERY_GROUP_TYPE_GPU)
- continue;
-
   g->Name = group_info.name;
   g->MaxActiveCounters = group_info.max_active_queries;
   g->NumCounters = 0;
-- 
2.5.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 07/10] radeon: split hw query buffer handling from cs emit

2015-11-13 Thread Nicolai Hähnle
The idea here is that driver queries implemented outside of common code
will use the same query buffer handling with different logic for starting
and stopping the corresponding counters.
---
 src/gallium/drivers/radeon/r600_query.c | 198 +++-
 src/gallium/drivers/radeon/r600_query.h |  20 
 2 files changed, 135 insertions(+), 83 deletions(-)

diff --git a/src/gallium/drivers/radeon/r600_query.c 
b/src/gallium/drivers/radeon/r600_query.c
index eb2a563..4b201fd 100644
--- a/src/gallium/drivers/radeon/r600_query.c
+++ b/src/gallium/drivers/radeon/r600_query.c
@@ -229,21 +229,10 @@ void r600_query_hw_destroy(struct r600_common_context 
*rctx,
FREE(rquery);
 }
 
-static bool r600_is_timer_query(unsigned type)
+static struct r600_resource *r600_new_query_buffer(struct r600_common_context 
*ctx,
+  struct r600_query_hw *query)
 {
-   return type == PIPE_QUERY_TIME_ELAPSED ||
-  type == PIPE_QUERY_TIMESTAMP;
-}
-
-static bool r600_query_needs_begin(unsigned type)
-{
-   return type != PIPE_QUERY_TIMESTAMP;
-}
-
-static struct r600_resource *r600_new_query_buffer(struct r600_common_context 
*ctx, unsigned type)
-{
-   unsigned j, i, num_results, buf_size = 4096;
-   uint32_t *results;
+   unsigned buf_size = 4096;
 
/* Queries are normally read by the CPU after
 * being written by the gpu, hence staging is probably a good
@@ -253,14 +242,34 @@ static struct r600_resource *r600_new_query_buffer(struct 
r600_common_context *c
pipe_buffer_create(ctx->b.screen, PIPE_BIND_CUSTOM,
   PIPE_USAGE_STAGING, buf_size);
 
-   switch (type) {
-   case PIPE_QUERY_OCCLUSION_COUNTER:
-   case PIPE_QUERY_OCCLUSION_PREDICATE:
-   results = r600_buffer_map_sync_with_rings(ctx, buf, 
PIPE_TRANSFER_WRITE);
-   memset(results, 0, buf_size);
+   if (query->ops->prepare_buffer)
+   query->ops->prepare_buffer(ctx, query, buf);
+
+   return buf;
+}
+
+static void r600_query_hw_prepare_buffer(struct r600_common_context *ctx,
+struct r600_query_hw *query,
+struct r600_resource *buffer)
+ {
+   uint32_t *results;
+
+   if (query->b.type == PIPE_QUERY_TIME_ELAPSED ||
+   query->b.type == PIPE_QUERY_TIMESTAMP)
+   return;
+
+   results = r600_buffer_map_sync_with_rings(ctx, buffer,
+ PIPE_TRANSFER_WRITE);
+
+   memset(results, 0, buffer->b.b.width0);
+
+   if (query->b.type == PIPE_QUERY_OCCLUSION_COUNTER ||
+   query->b.type == PIPE_QUERY_OCCLUSION_PREDICATE) {
+   unsigned num_results;
+   unsigned i, j;
 
/* Set top bits for unused backends. */
-   num_results = buf_size / (16 * ctx->max_db);
+   num_results = buffer->b.b.width0 / (16 * ctx->max_db);
for (j = 0; j < num_results; j++) {
for (i = 0; i < ctx->max_db; i++) {
if (!(ctx->backend_mask & (1

[Mesa-dev] [PATCH 09/10] radeon: expose r600_query_hw functions for reuse

2015-11-13 Thread Nicolai Hähnle
---
 src/gallium/drivers/radeon/r600_query.c | 30 +-
 src/gallium/drivers/radeon/r600_query.h | 10 ++
 2 files changed, 27 insertions(+), 13 deletions(-)

diff --git a/src/gallium/drivers/radeon/r600_query.c 
b/src/gallium/drivers/radeon/r600_query.c
index 59e2a58..4f89634 100644
--- a/src/gallium/drivers/radeon/r600_query.c
+++ b/src/gallium/drivers/radeon/r600_query.c
@@ -282,11 +282,6 @@ static void r600_query_hw_prepare_buffer(struct 
r600_common_context *ctx,
}
 }
 
-static boolean r600_query_hw_begin(struct r600_common_context *, struct 
r600_query *);
-static void r600_query_hw_end(struct r600_common_context *, struct r600_query 
*);
-static boolean r600_query_hw_get_result(struct r600_common_context *,
-   struct r600_query *, boolean wait,
-   union pipe_query_result *result);
 static void r600_do_render_condition(struct r600_common_context *,
 struct r600_query *, boolean condition,
 uint mode);
@@ -321,6 +316,16 @@ static struct r600_query_hw_ops query_hw_default_hw_ops = {
.add_result = r600_query_hw_add_result,
 };
 
+boolean r600_query_hw_init(struct r600_common_context *rctx,
+  struct r600_query_hw *query)
+{
+   query->buffer.buf = r600_new_query_buffer(rctx, query);
+   if (!query->buffer.buf)
+   return FALSE;
+
+   return TRUE;
+}
+
 static struct pipe_query *r600_query_hw_create(struct r600_common_context 
*rctx,
   unsigned query_type,
   unsigned index)
@@ -370,8 +375,7 @@ static struct pipe_query *r600_query_hw_create(struct 
r600_common_context *rctx,
return NULL;
}
 
-   query->buffer.buf = r600_new_query_buffer(rctx, query);
-   if (!query->buffer.buf) {
+   if (!r600_query_hw_init(rctx, query)) {
FREE(query);
return NULL;
}
@@ -645,8 +649,8 @@ static boolean r600_begin_query(struct pipe_context *ctx,
return rquery->ops->begin(rctx, rquery);
 }
 
-static boolean r600_query_hw_begin(struct r600_common_context *rctx,
-  struct r600_query *rquery)
+boolean r600_query_hw_begin(struct r600_common_context *rctx,
+   struct r600_query *rquery)
 {
struct r600_query_hw *query = (struct r600_query_hw *)rquery;
struct r600_query_buffer *prev = query->buffer.previous;
@@ -691,7 +695,7 @@ static void r600_end_query(struct pipe_context *ctx, struct 
pipe_query *query)
rquery->ops->end(rctx, rquery);
 }
 
-static void r600_query_hw_end(struct r600_common_context *rctx,
+void r600_query_hw_end(struct r600_common_context *rctx,
  struct r600_query *rquery)
 {
struct r600_query_hw *query = (struct r600_query_hw *)rquery;
@@ -858,9 +862,9 @@ static void r600_query_hw_clear_result(struct r600_query_hw 
*query,
util_query_clear_result(result, query->b.type);
 }
 
-static boolean r600_query_hw_get_result(struct r600_common_context *rctx,
-   struct r600_query *rquery,
-   boolean wait, union pipe_query_result 
*result)
+boolean r600_query_hw_get_result(struct r600_common_context *rctx,
+struct r600_query *rquery,
+boolean wait, union pipe_query_result *result)
 {
struct r600_query_hw *query = (struct r600_query_hw *)rquery;
struct r600_query_buffer *qbuf;
diff --git a/src/gallium/drivers/radeon/r600_query.h 
b/src/gallium/drivers/radeon/r600_query.h
index 17a9da3..4e357f5 100644
--- a/src/gallium/drivers/radeon/r600_query.h
+++ b/src/gallium/drivers/radeon/r600_query.h
@@ -122,7 +122,17 @@ struct r600_query_hw {
unsigned stream;
 };
 
+boolean r600_query_hw_init(struct r600_common_context *rctx,
+  struct r600_query_hw *query);
 void r600_query_hw_destroy(struct r600_common_context *rctx,
   struct r600_query *rquery);
+boolean r600_query_hw_begin(struct r600_common_context *rctx,
+   struct r600_query *rquery);
+void r600_query_hw_end(struct r600_common_context *rctx,
+  struct r600_query *rquery);
+boolean r600_query_hw_get_result(struct r600_common_context *rctx,
+struct r600_query *rquery,
+boolean wait,
+union pipe_query_result *result);
 
 #endif /* R600_QUERY_H */
-- 
2.5.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 02/10] radeon: cleanup driver query list

2015-11-13 Thread Nicolai Hähnle
---
 src/gallium/drivers/radeon/r600_query.c | 84 +
 1 file changed, 55 insertions(+), 29 deletions(-)

diff --git a/src/gallium/drivers/radeon/r600_query.c 
b/src/gallium/drivers/radeon/r600_query.c
index 8aa8774..60381b2 100644
--- a/src/gallium/drivers/radeon/r600_query.c
+++ b/src/gallium/drivers/radeon/r600_query.c
@@ -1017,39 +1017,50 @@ err:
return;
 }
 
+#define X(name_, query_type_, type_, result_type_) \
+   { \
+   .name = name_, \
+   .query_type = R600_QUERY_##query_type_, \
+   .type = PIPE_DRIVER_QUERY_TYPE_##type_, \
+   .result_type = PIPE_DRIVER_QUERY_RESULT_TYPE_##result_type_, \
+   .group_id = ~(unsigned)0 \
+   }
+
+static struct pipe_driver_query_info r600_driver_query_list[] = {
+   X("num-compilations",   NUM_COMPILATIONS,   UINT64, 
CUMULATIVE),
+   X("num-shaders-created",NUM_SHADERS_CREATED,UINT64, 
CUMULATIVE),
+   X("draw-calls", DRAW_CALLS, UINT64, 
CUMULATIVE),
+   X("requested-VRAM", REQUESTED_VRAM, BYTES, AVERAGE),
+   X("requested-GTT",  REQUESTED_GTT,  BYTES, AVERAGE),
+   X("buffer-wait-time",   BUFFER_WAIT_TIME,   MICROSECONDS, 
CUMULATIVE),
+   X("num-cs-flushes", NUM_CS_FLUSHES, UINT64, 
CUMULATIVE),
+   X("num-bytes-moved",NUM_BYTES_MOVED,BYTES, 
CUMULATIVE),
+   X("VRAM-usage", VRAM_USAGE, BYTES, AVERAGE),
+   X("GTT-usage",  GTT_USAGE,  BYTES, AVERAGE),
+   X("GPU-load",   GPU_LOAD,   UINT64, 
AVERAGE),
+   X("temperature",GPU_TEMPERATURE,UINT64, 
AVERAGE),
+   X("shader-clock",   CURRENT_GPU_SCLK,   HZ, AVERAGE),
+   X("memory-clock",   CURRENT_GPU_MCLK,   HZ, AVERAGE),
+};
+
+#undef X
+
+static unsigned r600_get_num_queries(struct r600_common_screen *rscreen)
+{
+   if (rscreen->info.drm_major == 2 && rscreen->info.drm_minor >= 42)
+   return Elements(r600_driver_query_list);
+   else if (rscreen->info.drm_major == 3)
+   return Elements(r600_driver_query_list) - 3;
+   else
+   return Elements(r600_driver_query_list) - 4;
+}
+
 static int r600_get_driver_query_info(struct pipe_screen *screen,
  unsigned index,
  struct pipe_driver_query_info *info)
 {
struct r600_common_screen *rscreen = (struct r600_common_screen*)screen;
-   struct pipe_driver_query_info list[] = {
-   {"num-compilations", R600_QUERY_NUM_COMPILATIONS, {0}, 
PIPE_DRIVER_QUERY_TYPE_UINT64,
-PIPE_DRIVER_QUERY_RESULT_TYPE_CUMULATIVE},
-   {"num-shaders-created", R600_QUERY_NUM_SHADERS_CREATED, {0}, 
PIPE_DRIVER_QUERY_TYPE_UINT64,
-PIPE_DRIVER_QUERY_RESULT_TYPE_CUMULATIVE},
-   {"draw-calls", R600_QUERY_DRAW_CALLS, {0}},
-   {"requested-VRAM", R600_QUERY_REQUESTED_VRAM, 
{rscreen->info.vram_size}, PIPE_DRIVER_QUERY_TYPE_BYTES},
-   {"requested-GTT", R600_QUERY_REQUESTED_GTT, 
{rscreen->info.gart_size}, PIPE_DRIVER_QUERY_TYPE_BYTES},
-   {"buffer-wait-time", R600_QUERY_BUFFER_WAIT_TIME, {0}, 
PIPE_DRIVER_QUERY_TYPE_MICROSECONDS,
-PIPE_DRIVER_QUERY_RESULT_TYPE_CUMULATIVE},
-   {"num-cs-flushes", R600_QUERY_NUM_CS_FLUSHES, {0}},
-   {"num-bytes-moved", R600_QUERY_NUM_BYTES_MOVED, {0}, 
PIPE_DRIVER_QUERY_TYPE_BYTES,
-PIPE_DRIVER_QUERY_RESULT_TYPE_CUMULATIVE},
-   {"VRAM-usage", R600_QUERY_VRAM_USAGE, 
{rscreen->info.vram_size}, PIPE_DRIVER_QUERY_TYPE_BYTES},
-   {"GTT-usage", R600_QUERY_GTT_USAGE, {rscreen->info.gart_size}, 
PIPE_DRIVER_QUERY_TYPE_BYTES},
-   {"GPU-load", R600_QUERY_GPU_LOAD, {100}},
-   {"temperature", R600_QUERY_GPU_TEMPERATURE, {125}},
-   {"shader-clock", R600_QUERY_CURRENT_GPU_SCLK, {0}, 
PIPE_DRIVER_QUERY_TYPE_HZ},
-   {"memory-clock", R600_QUERY_CURRENT_GPU_MCLK, {0}, 
PIPE_DRIVER_QUERY_TYPE_HZ},
-   };
-   unsigned num_queries;
-
-   if (rscreen->info.drm_major == 2 && rscreen->info.drm_minor >= 42)
-   num_queries = Elements(list);
-   else if (rscreen->info.drm_major == 3)
-   num_queries = Elements(list) - 3;
-   else
-   num_queries = Elements(list) - 4;
+   unsigned num_queries = r600_get_num_queries(rscreen);
 
if (!info)
return num_queries;
@@ -1057,7 +1068,22 @@ static int r600_get_driver_query_info(struct pipe_screen 
*screen,
if (index >= num_queries)
return 0;
 
-   *info = list[index];
+   *info = r600_driver_query_list[index];
+
+   switch (info->que

[Mesa-dev] [PATCH 03/10] radeon: move R600_QUERY_* constants into a new query header file

2015-11-13 Thread Nicolai Hähnle
More query-related structures will have to be moved into their own
header file to support hardware-specific performance counters.
---
 src/gallium/drivers/radeon/Makefile.sources   |  1 +
 src/gallium/drivers/radeon/r600_pipe_common.h | 15 
 src/gallium/drivers/radeon/r600_query.c   |  1 +
 src/gallium/drivers/radeon/r600_query.h   | 49 +++
 4 files changed, 51 insertions(+), 15 deletions(-)
 create mode 100644 src/gallium/drivers/radeon/r600_query.h

diff --git a/src/gallium/drivers/radeon/Makefile.sources 
b/src/gallium/drivers/radeon/Makefile.sources
index f63790c..d840ff8 100644
--- a/src/gallium/drivers/radeon/Makefile.sources
+++ b/src/gallium/drivers/radeon/Makefile.sources
@@ -7,6 +7,7 @@ C_SOURCES := \
r600_pipe_common.c \
r600_pipe_common.h \
r600_query.c \
+   r600_query.h \
r600_streamout.c \
r600_texture.c \
radeon_uvd.c \
diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h 
b/src/gallium/drivers/radeon/r600_pipe_common.h
index d2c54f3..419f785 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.h
+++ b/src/gallium/drivers/radeon/r600_pipe_common.h
@@ -47,21 +47,6 @@
 #define R600_RESOURCE_FLAG_FLUSHED_DEPTH   (PIPE_RESOURCE_FLAG_DRV_PRIV << 
1)
 #define R600_RESOURCE_FLAG_FORCE_TILING
(PIPE_RESOURCE_FLAG_DRV_PRIV << 2)
 
-#define R600_QUERY_DRAW_CALLS  (PIPE_QUERY_DRIVER_SPECIFIC + 0)
-#define R600_QUERY_REQUESTED_VRAM  (PIPE_QUERY_DRIVER_SPECIFIC + 1)
-#define R600_QUERY_REQUESTED_GTT   (PIPE_QUERY_DRIVER_SPECIFIC + 2)
-#define R600_QUERY_BUFFER_WAIT_TIME(PIPE_QUERY_DRIVER_SPECIFIC + 3)
-#define R600_QUERY_NUM_CS_FLUSHES  (PIPE_QUERY_DRIVER_SPECIFIC + 4)
-#define R600_QUERY_NUM_BYTES_MOVED (PIPE_QUERY_DRIVER_SPECIFIC + 5)
-#define R600_QUERY_VRAM_USAGE  (PIPE_QUERY_DRIVER_SPECIFIC + 6)
-#define R600_QUERY_GTT_USAGE   (PIPE_QUERY_DRIVER_SPECIFIC + 7)
-#define R600_QUERY_GPU_TEMPERATURE (PIPE_QUERY_DRIVER_SPECIFIC + 8)
-#define R600_QUERY_CURRENT_GPU_SCLK(PIPE_QUERY_DRIVER_SPECIFIC + 9)
-#define R600_QUERY_CURRENT_GPU_MCLK(PIPE_QUERY_DRIVER_SPECIFIC + 10)
-#define R600_QUERY_GPU_LOAD(PIPE_QUERY_DRIVER_SPECIFIC + 11)
-#define R600_QUERY_NUM_COMPILATIONS(PIPE_QUERY_DRIVER_SPECIFIC + 12)
-#define R600_QUERY_NUM_SHADERS_CREATED (PIPE_QUERY_DRIVER_SPECIFIC + 13)
-
 #define R600_CONTEXT_STREAMOUT_FLUSH   (1u << 0)
 #define R600_CONTEXT_PRIVATE_FLAG  (1u << 1)
 
diff --git a/src/gallium/drivers/radeon/r600_query.c 
b/src/gallium/drivers/radeon/r600_query.c
index 60381b2..b79d2d0 100644
--- a/src/gallium/drivers/radeon/r600_query.c
+++ b/src/gallium/drivers/radeon/r600_query.c
@@ -22,6 +22,7 @@
  * USE OR OTHER DEALINGS IN THE SOFTWARE.
  */
 
+#include "r600_query.h"
 #include "r600_cs.h"
 #include "util/u_memory.h"
 
diff --git a/src/gallium/drivers/radeon/r600_query.h 
b/src/gallium/drivers/radeon/r600_query.h
new file mode 100644
index 000..fc8b47b
--- /dev/null
+++ b/src/gallium/drivers/radeon/r600_query.h
@@ -0,0 +1,49 @@
+/*
+ * Copyright 2015 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 
FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 
THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *  Nicolai Hähnle 
+ *
+ */
+
+#ifndef R600_QUERY_H
+#define R600_QUERY_H
+
+#include "pipe/p_defines.h"
+
+#define R600_QUERY_DRAW_CALLS  (PIPE_QUERY_DRIVER_SPECIFIC + 0)
+#define R600_QUERY_REQUESTED_VRAM  (PIPE_QUERY_DRIVER_SPECIFIC + 1)
+#define R600_QUERY_REQUESTED_GTT   (PIPE_QUERY_DRIVER_SPECIFIC + 2)
+#define R600_QUERY_BUFFER_WAIT_TIME(PIPE_QUERY_DRIVER_SPECIFIC + 3)
+#define R600_QUERY_NUM_CS_FLUSHES  (PIPE_QUERY_DRIVER_SPECIFIC + 4)
+#define R600_QUERY_NUM_BYTES_MOVED (PIPE_QUERY_DRIVER_SPECIFIC + 5)
+#define R600_QUERY_VRAM_USAGE  (PIPE_QUERY_DRIVER_S

[Mesa-dev] [PATCH 04/10] radeon: add query handler function pointers

2015-11-13 Thread Nicolai Hähnle
The goal here is to be able to move the implementation details of hardware-
specific queries (in particular, performance counters) out of the common code.
---
 src/gallium/drivers/radeon/r600_query.c | 73 +
 src/gallium/drivers/radeon/r600_query.h | 16 
 2 files changed, 80 insertions(+), 9 deletions(-)

diff --git a/src/gallium/drivers/radeon/r600_query.c 
b/src/gallium/drivers/radeon/r600_query.c
index b79d2d0..fdab8e3 100644
--- a/src/gallium/drivers/radeon/r600_query.c
+++ b/src/gallium/drivers/radeon/r600_query.c
@@ -26,7 +26,6 @@
 #include "r600_cs.h"
 #include "util/u_memory.h"
 
-
 struct r600_query_buffer {
/* The buffer where query results are stored. */
struct r600_resource*buf;
@@ -39,6 +38,8 @@ struct r600_query_buffer {
 };
 
 struct r600_query {
+   struct r600_query_ops *ops;
+
/* The query buffer and how many results are in it. */
struct r600_query_bufferbuffer;
/* The type of query */
@@ -59,6 +60,23 @@ struct r600_query {
unsigned stream;
 };
 
+static void r600_do_destroy_query(struct r600_common_context *, struct 
r600_query *);
+static boolean r600_do_begin_query(struct r600_common_context *, struct 
r600_query *);
+static void r600_do_end_query(struct r600_common_context *, struct r600_query 
*);
+static boolean r600_do_get_query_result(struct r600_common_context *,
+   struct r600_query *, boolean wait,
+   union pipe_query_result *result);
+static void r600_do_render_condition(struct r600_common_context *,
+struct r600_query *, boolean condition,
+uint mode);
+
+static struct r600_query_ops legacy_query_ops = {
+   .destroy = r600_do_destroy_query,
+   .begin = r600_do_begin_query,
+   .end = r600_do_end_query,
+   .get_result = r600_do_get_query_result,
+   .render_condition = r600_do_render_condition,
+};
 
 static bool r600_is_timer_query(unsigned type)
 {
@@ -366,6 +384,7 @@ static struct pipe_query *r600_create_query(struct 
pipe_context *ctx, unsigned q
return NULL;
 
query->type = query_type;
+   query->ops = &legacy_query_ops;
 
switch (query_type) {
case PIPE_QUERY_OCCLUSION_COUNTER:
@@ -373,7 +392,6 @@ static struct pipe_query *r600_create_query(struct 
pipe_context *ctx, unsigned q
query->result_size = 16 * rctx->max_db;
query->num_cs_dw = 6;
break;
-   break;
case PIPE_QUERY_TIME_ELAPSED:
query->result_size = 16;
query->num_cs_dw = 8;
@@ -433,7 +451,15 @@ static struct pipe_query *r600_create_query(struct 
pipe_context *ctx, unsigned q
 
 static void r600_destroy_query(struct pipe_context *ctx, struct pipe_query 
*query)
 {
-   struct r600_query *rquery = (struct r600_query*)query;
+   struct r600_common_context *rctx = (struct r600_common_context *)ctx;
+   struct r600_query *rquery = (struct r600_query *)query;
+
+   rquery->ops->destroy(rctx, rquery);
+}
+
+static void r600_do_destroy_query(struct r600_common_context *rctx,
+ struct r600_query *rquery)
+{
struct r600_query_buffer *prev = rquery->buffer.previous;
 
/* Release all query buffers. */
@@ -445,7 +471,7 @@ static void r600_destroy_query(struct pipe_context *ctx, 
struct pipe_query *quer
}
 
pipe_resource_reference((struct pipe_resource**)&rquery->buffer.buf, 
NULL);
-   FREE(query);
+   FREE(rquery);
 }
 
 static boolean r600_begin_query(struct pipe_context *ctx,
@@ -453,6 +479,13 @@ static boolean r600_begin_query(struct pipe_context *ctx,
 {
struct r600_common_context *rctx = (struct r600_common_context *)ctx;
struct r600_query *rquery = (struct r600_query *)query;
+
+   return rquery->ops->begin(rctx, rquery);
+}
+
+static boolean r600_do_begin_query(struct r600_common_context *rctx,
+  struct r600_query *rquery)
+{
struct r600_query_buffer *prev = rquery->buffer.previous;
 
if (!r600_query_needs_begin(rquery->type)) {
@@ -528,6 +561,12 @@ static void r600_end_query(struct pipe_context *ctx, 
struct pipe_query *query)
struct r600_common_context *rctx = (struct r600_common_context *)ctx;
struct r600_query *rquery = (struct r600_query *)query;
 
+   rquery->ops->end(rctx, rquery);
+}
+
+static void r600_do_end_query(struct r600_common_context *rctx,
+ struct r600_query *rquery)
+{
/* Non-GPU queries. */
switch (rquery->type) {
case PIPE_QUERY_TIMESTAMP_DISJOINT:
@@ -792,11 +831,19 @@ static boolean r600_get_query_buffer_result(struct 
r600_common_context *ctx,
 }
 
 static boolean r600_get_query_result(struct pipe_context *ctx,
-  

[Mesa-dev] [PATCH 01/10] radeon: move get_driver_query_info to r600_query.c

2015-11-13 Thread Nicolai Hähnle
---
 src/gallium/drivers/radeon/r600_pipe_common.c | 46 +
 src/gallium/drivers/radeon/r600_pipe_common.h |  1 +
 src/gallium/drivers/radeon/r600_query.c   | 49 +++
 3 files changed, 51 insertions(+), 45 deletions(-)

diff --git a/src/gallium/drivers/radeon/r600_pipe_common.c 
b/src/gallium/drivers/radeon/r600_pipe_common.c
index 79e624e..41acfbc 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.c
+++ b/src/gallium/drivers/radeon/r600_pipe_common.c
@@ -737,50 +737,6 @@ static uint64_t r600_get_timestamp(struct pipe_screen 
*screen)
rscreen->info.r600_clock_crystal_freq;
 }
 
-static int r600_get_driver_query_info(struct pipe_screen *screen,
- unsigned index,
- struct pipe_driver_query_info *info)
-{
-   struct r600_common_screen *rscreen = (struct r600_common_screen*)screen;
-   struct pipe_driver_query_info list[] = {
-   {"num-compilations", R600_QUERY_NUM_COMPILATIONS, {0}, 
PIPE_DRIVER_QUERY_TYPE_UINT64,
-PIPE_DRIVER_QUERY_RESULT_TYPE_CUMULATIVE},
-   {"num-shaders-created", R600_QUERY_NUM_SHADERS_CREATED, {0}, 
PIPE_DRIVER_QUERY_TYPE_UINT64,
-PIPE_DRIVER_QUERY_RESULT_TYPE_CUMULATIVE},
-   {"draw-calls", R600_QUERY_DRAW_CALLS, {0}},
-   {"requested-VRAM", R600_QUERY_REQUESTED_VRAM, 
{rscreen->info.vram_size}, PIPE_DRIVER_QUERY_TYPE_BYTES},
-   {"requested-GTT", R600_QUERY_REQUESTED_GTT, 
{rscreen->info.gart_size}, PIPE_DRIVER_QUERY_TYPE_BYTES},
-   {"buffer-wait-time", R600_QUERY_BUFFER_WAIT_TIME, {0}, 
PIPE_DRIVER_QUERY_TYPE_MICROSECONDS,
-PIPE_DRIVER_QUERY_RESULT_TYPE_CUMULATIVE},
-   {"num-cs-flushes", R600_QUERY_NUM_CS_FLUSHES, {0}},
-   {"num-bytes-moved", R600_QUERY_NUM_BYTES_MOVED, {0}, 
PIPE_DRIVER_QUERY_TYPE_BYTES,
-PIPE_DRIVER_QUERY_RESULT_TYPE_CUMULATIVE},
-   {"VRAM-usage", R600_QUERY_VRAM_USAGE, 
{rscreen->info.vram_size}, PIPE_DRIVER_QUERY_TYPE_BYTES},
-   {"GTT-usage", R600_QUERY_GTT_USAGE, {rscreen->info.gart_size}, 
PIPE_DRIVER_QUERY_TYPE_BYTES},
-   {"GPU-load", R600_QUERY_GPU_LOAD, {100}},
-   {"temperature", R600_QUERY_GPU_TEMPERATURE, {125}},
-   {"shader-clock", R600_QUERY_CURRENT_GPU_SCLK, {0}, 
PIPE_DRIVER_QUERY_TYPE_HZ},
-   {"memory-clock", R600_QUERY_CURRENT_GPU_MCLK, {0}, 
PIPE_DRIVER_QUERY_TYPE_HZ},
-   };
-   unsigned num_queries;
-
-   if (rscreen->info.drm_major == 2 && rscreen->info.drm_minor >= 42)
-   num_queries = Elements(list);
-   else if (rscreen->info.drm_major == 3)
-   num_queries = Elements(list) - 3;
-   else
-   num_queries = Elements(list) - 4;
-
-   if (!info)
-   return num_queries;
-
-   if (index >= num_queries)
-   return 0;
-
-   *info = list[index];
-   return 1;
-}
-
 static void r600_fence_reference(struct pipe_screen *screen,
 struct pipe_fence_handle **dst,
 struct pipe_fence_handle *src)
@@ -968,7 +924,6 @@ bool r600_common_screen_init(struct r600_common_screen 
*rscreen,
rscreen->b.get_device_vendor = r600_get_device_vendor;
rscreen->b.get_compute_param = r600_get_compute_param;
rscreen->b.get_paramf = r600_get_paramf;
-   rscreen->b.get_driver_query_info = r600_get_driver_query_info;
rscreen->b.get_timestamp = r600_get_timestamp;
rscreen->b.fence_finish = r600_fence_finish;
rscreen->b.fence_reference = r600_fence_reference;
@@ -984,6 +939,7 @@ bool r600_common_screen_init(struct r600_common_screen 
*rscreen,
}
 
r600_init_screen_texture_functions(rscreen);
+   r600_init_screen_query_functions(rscreen);
 
rscreen->ws = ws;
rscreen->family = rscreen->info.family;
diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h 
b/src/gallium/drivers/radeon/r600_pipe_common.h
index b7f1a23..d2c54f3 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.h
+++ b/src/gallium/drivers/radeon/r600_pipe_common.h
@@ -534,6 +534,7 @@ uint64_t r600_gpu_load_begin(struct r600_common_screen 
*rscreen);
 unsigned r600_gpu_load_end(struct r600_common_screen *rscreen, uint64_t begin);
 
 /* r600_query.c */
+void r600_init_screen_query_functions(struct r600_common_screen *rscreen);
 void r600_query_init(struct r600_common_context *rctx);
 void r600_suspend_nontimer_queries(struct r600_common_context *ctx);
 void r600_resume_nontimer_queries(struct r600_common_context *ctx);
diff --git a/src/gallium/drivers/radeon/r600_query.c 
b/src/gallium/drivers/radeon/r600_query.c
index 9a54025..8aa8774 100644
--- a/src/gallium/drivers/radeon/r600_query.c
+++ b/src/gallium/drivers/radeon/r600_query.c
@@ -1017,6 +1017,50 @@ err:
return;
 }
 

[Mesa-dev] [PATCH 06/10] radeon: convert hardware queries to the new style

2015-11-13 Thread Nicolai Hähnle
Move r600_query and r600_query_hw into the header because we will want to
reuse the buffer handling and suspend/resume logic outside of the common
radeon code.
---
 src/gallium/drivers/radeon/r600_query.c | 281 +++-
 src/gallium/drivers/radeon/r600_query.h |  39 +
 2 files changed, 172 insertions(+), 148 deletions(-)

diff --git a/src/gallium/drivers/radeon/r600_query.c 
b/src/gallium/drivers/radeon/r600_query.c
index c7350f1..eb2a563 100644
--- a/src/gallium/drivers/radeon/r600_query.c
+++ b/src/gallium/drivers/radeon/r600_query.c
@@ -26,35 +26,6 @@
 #include "r600_cs.h"
 #include "util/u_memory.h"
 
-struct r600_query_buffer {
-   /* The buffer where query results are stored. */
-   struct r600_resource*buf;
-   /* Offset of the next free result after current query data */
-   unsignedresults_end;
-   /* If a query buffer is full, a new buffer is created and the old one
-* is put in here. When we calculate the result, we sum up the samples
-* from all buffers. */
-   struct r600_query_buffer*previous;
-};
-
-struct r600_query {
-   struct r600_query_ops *ops;
-
-   /* The query buffer and how many results are in it. */
-   struct r600_query_bufferbuffer;
-   /* The type of query */
-   unsignedtype;
-   /* Size of the result in memory for both begin_query and end_query,
-* this can be one or two numbers, or it could even be a size of a 
structure. */
-   unsignedresult_size;
-   /* The number of dwords for begin_query or end_query. */
-   unsignednum_cs_dw;
-   /* linked list of queries */
-   struct list_headlist;
-   /* For transform feedback: which stream the query is for */
-   unsigned stream;
-};
-
 /* Queries without buffer handling or suspend/resume. */
 struct r600_query_sw {
struct r600_query b;
@@ -240,23 +211,23 @@ static struct pipe_query *r600_query_sw_create(struct 
pipe_context *ctx,
return (struct pipe_query *)query;
 }
 
-static void r600_do_destroy_query(struct r600_common_context *, struct 
r600_query *);
-static boolean r600_do_begin_query(struct r600_common_context *, struct 
r600_query *);
-static void r600_do_end_query(struct r600_common_context *, struct r600_query 
*);
-static boolean r600_do_get_query_result(struct r600_common_context *,
-   struct r600_query *, boolean wait,
-   union pipe_query_result *result);
-static void r600_do_render_condition(struct r600_common_context *,
-struct r600_query *, boolean condition,
-uint mode);
+void r600_query_hw_destroy(struct r600_common_context *rctx,
+  struct r600_query *rquery)
+{
+   struct r600_query_hw *query = (struct r600_query_hw *)rquery;
+   struct r600_query_buffer *prev = query->buffer.previous;
 
-static struct r600_query_ops legacy_query_ops = {
-   .destroy = r600_do_destroy_query,
-   .begin = r600_do_begin_query,
-   .end = r600_do_end_query,
-   .get_result = r600_do_get_query_result,
-   .render_condition = r600_do_render_condition,
-};
+   /* Release all query buffers. */
+   while (prev) {
+   struct r600_query_buffer *qbuf = prev;
+   prev = prev->previous;
+   pipe_resource_reference((struct pipe_resource**)&qbuf->buf, 
NULL);
+   FREE(qbuf);
+   }
+
+   pipe_resource_reference((struct pipe_resource**)&query->buffer.buf, 
NULL);
+   FREE(rquery);
+}
 
 static bool r600_is_timer_query(unsigned type)
 {
@@ -317,6 +288,77 @@ static struct r600_resource *r600_new_query_buffer(struct 
r600_common_context *c
return buf;
 }
 
+static boolean r600_query_hw_begin(struct r600_common_context *, struct 
r600_query *);
+static void r600_query_hw_end(struct r600_common_context *, struct r600_query 
*);
+static boolean r600_query_hw_get_result(struct r600_common_context *,
+   struct r600_query *, boolean wait,
+   union pipe_query_result *result);
+static void r600_do_render_condition(struct r600_common_context *,
+struct r600_query *, boolean condition,
+uint mode);
+
+static struct r600_query_ops query_hw_ops = {
+   .destroy = r600_query_hw_destroy,
+   .begin = r600_query_hw_begin,
+   .end = r600_query_hw_end,
+   .get_result = r600_query_hw_get_result,
+   .render_condition = r600_do_render_condition,
+};
+
+static struct pipe_query *r600_query_hw_create(struct r600_common_context 
*rctx,
+  unsigned que

[Mesa-dev] [PATCH 10/10] radeon: count cs dwords separately for query begin and end

2015-11-13 Thread Nicolai Hähnle
This will be important for perfcounter queries.
---
 src/gallium/drivers/radeon/r600_query.c | 33 +++--
 src/gallium/drivers/radeon/r600_query.h |  3 ++-
 2 files changed, 21 insertions(+), 15 deletions(-)

diff --git a/src/gallium/drivers/radeon/r600_query.c 
b/src/gallium/drivers/radeon/r600_query.c
index 4f89634..f8a30a2 100644
--- a/src/gallium/drivers/radeon/r600_query.c
+++ b/src/gallium/drivers/radeon/r600_query.c
@@ -342,16 +342,18 @@ static struct pipe_query *r600_query_hw_create(struct 
r600_common_context *rctx,
case PIPE_QUERY_OCCLUSION_COUNTER:
case PIPE_QUERY_OCCLUSION_PREDICATE:
query->result_size = 16 * rctx->max_db;
-   query->num_cs_dw = 6;
+   query->num_cs_dw_begin = 6;
+   query->num_cs_dw_end = 6;
break;
case PIPE_QUERY_TIME_ELAPSED:
query->result_size = 16;
-   query->num_cs_dw = 8;
+   query->num_cs_dw_begin = 8;
+   query->num_cs_dw_end = 8;
query->flags = R600_QUERY_HW_FLAG_TIMER;
break;
case PIPE_QUERY_TIMESTAMP:
query->result_size = 8;
-   query->num_cs_dw = 8;
+   query->num_cs_dw_end = 8;
query->flags = R600_QUERY_HW_FLAG_TIMER |
   R600_QUERY_HW_FLAG_NO_START;
break;
@@ -361,13 +363,15 @@ static struct pipe_query *r600_query_hw_create(struct 
r600_common_context *rctx,
case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
/* NumPrimitivesWritten, PrimitiveStorageNeeded. */
query->result_size = 32;
-   query->num_cs_dw = 6;
+   query->num_cs_dw_begin = 6;
+   query->num_cs_dw_end = 6;
query->stream = index;
break;
case PIPE_QUERY_PIPELINE_STATISTICS:
/* 11 values on EG, 8 on R600. */
query->result_size = (rctx->chip_class >= EVERGREEN ? 11 : 8) * 
16;
-   query->num_cs_dw = 6;
+   query->num_cs_dw_begin = 6;
+   query->num_cs_dw_end = 6;
break;
default:
assert(0);
@@ -465,7 +469,9 @@ static void r600_query_hw_emit_start(struct 
r600_common_context *ctx,
 
r600_update_occlusion_query_state(ctx, query->b.type, 1);
r600_update_prims_generated_query_state(ctx, query->b.type, 1);
-   ctx->need_gfx_cs_space(&ctx->b, query->num_cs_dw * 2, TRUE);
+
+   ctx->need_gfx_cs_space(&ctx->b, query->num_cs_dw_begin + 
query->num_cs_dw_end,
+  TRUE);
 
/* Get a new query buffer if needed. */
if (query->buffer.results_end + query->result_size > 
query->buffer.buf->b.b.width0) {
@@ -482,10 +488,9 @@ static void r600_query_hw_emit_start(struct 
r600_common_context *ctx,
query->ops->emit_start(ctx, query, query->buffer.buf, va);
 
if (query->flags & R600_QUERY_HW_FLAG_TIMER)
-   ctx->num_cs_dw_timer_queries_suspend += query->num_cs_dw;
+   ctx->num_cs_dw_timer_queries_suspend += query->num_cs_dw_end;
else
-   ctx->num_cs_dw_nontimer_queries_suspend += query->num_cs_dw;
-
+   ctx->num_cs_dw_nontimer_queries_suspend += query->num_cs_dw_end;
 }
 
 static void r600_query_hw_do_emit_stop(struct r600_common_context *ctx,
@@ -546,7 +551,7 @@ static void r600_query_hw_emit_stop(struct 
r600_common_context *ctx,
 
/* The queries which need begin already called this in begin_query. */
if (query->flags & R600_QUERY_HW_FLAG_NO_START) {
-   ctx->need_gfx_cs_space(&ctx->b, query->num_cs_dw, FALSE);
+   ctx->need_gfx_cs_space(&ctx->b, query->num_cs_dw_end, FALSE);
}
 
/* emit end query */
@@ -558,9 +563,9 @@ static void r600_query_hw_emit_stop(struct 
r600_common_context *ctx,
 
if (!(query->flags & R600_QUERY_HW_FLAG_NO_START)) {
if (query->flags & R600_QUERY_HW_FLAG_TIMER)
-   ctx->num_cs_dw_timer_queries_suspend -= 
query->num_cs_dw;
+   ctx->num_cs_dw_timer_queries_suspend -= 
query->num_cs_dw_end;
else
-   ctx->num_cs_dw_nontimer_queries_suspend -= 
query->num_cs_dw;
+   ctx->num_cs_dw_nontimer_queries_suspend -= 
query->num_cs_dw_end;
}
 
r600_update_occlusion_query_state(ctx, query->b.type, -1);
@@ -980,14 +985,14 @@ static unsigned 
r600_queries_num_cs_dw_for_resuming(struct r600_common_context *
 
LIST_FOR_EACH_ENTRY(query, query_list, list) {
/* begin + end */
-   num_dw += query->num_cs_dw * 2;
+   num_dw += query->num_cs_dw_begin + query->num_cs_dw_end;
 
/* Workaround for the fact that
 * num_cs_dw_nontimer_queries_suspend is incremented for every
 * resumed query, whic

[Mesa-dev] [PATCH 08/10] radeon: implement r600_query_hw_get_result via function pointers

2015-11-13 Thread Nicolai Hähnle
We will need the clear_result override for the batch query implementation.
---
 src/gallium/drivers/radeon/r600_query.c | 189 +++-
 src/gallium/drivers/radeon/r600_query.h |   4 +
 2 files changed, 94 insertions(+), 99 deletions(-)

diff --git a/src/gallium/drivers/radeon/r600_query.c 
b/src/gallium/drivers/radeon/r600_query.c
index 4b201fd..59e2a58 100644
--- a/src/gallium/drivers/radeon/r600_query.c
+++ b/src/gallium/drivers/radeon/r600_query.c
@@ -307,11 +307,18 @@ static void r600_query_hw_do_emit_stop(struct 
r600_common_context *ctx,
   struct r600_query_hw *query,
   struct r600_resource *buffer,
   uint64_t va);
+static void r600_query_hw_add_result(struct r600_common_context *ctx,
+struct r600_query_hw *, void *buffer,
+union pipe_query_result *result);
+static void r600_query_hw_clear_result(struct r600_query_hw *,
+  union pipe_query_result *);
 
 static struct r600_query_hw_ops query_hw_default_hw_ops = {
.prepare_buffer = r600_query_hw_prepare_buffer,
.emit_start = r600_query_hw_do_emit_start,
.emit_stop = r600_query_hw_do_emit_stop,
+   .clear_result = r600_query_hw_clear_result,
+   .add_result = r600_query_hw_add_result,
 };
 
 static struct pipe_query *r600_query_hw_create(struct r600_common_context 
*rctx,
@@ -695,7 +702,7 @@ static void r600_query_hw_end(struct r600_common_context 
*rctx,
LIST_DELINIT(&query->list);
 }
 
-static unsigned r600_query_read_result(char *map, unsigned start_index, 
unsigned end_index,
+static unsigned r600_query_read_result(void *map, unsigned start_index, 
unsigned end_index,
   bool test_status_bit)
 {
uint32_t *current_result = (uint32_t*)map;
@@ -713,47 +720,36 @@ static unsigned r600_query_read_result(char *map, 
unsigned start_index, unsigned
return 0;
 }
 
-static boolean r600_get_query_buffer_result(struct r600_common_context *ctx,
-   struct r600_query_hw *query,
-   struct r600_query_buffer *qbuf,
-   boolean wait,
-   union pipe_query_result *result)
+static void r600_query_hw_add_result(struct r600_common_context *ctx,
+struct r600_query_hw *query,
+void *buffer,
+union pipe_query_result *result)
 {
-   unsigned results_base = 0;
-   char *map;
-
-   map = r600_buffer_map_sync_with_rings(ctx, qbuf->buf,
-   PIPE_TRANSFER_READ |
-   (wait ? 0 : 
PIPE_TRANSFER_DONTBLOCK));
-   if (!map)
-   return FALSE;
-
-   /* count all results across all data blocks */
switch (query->b.type) {
-   case PIPE_QUERY_OCCLUSION_COUNTER:
-   while (results_base != qbuf->results_end) {
+   case PIPE_QUERY_OCCLUSION_COUNTER: {
+   unsigned results_base = 0;
+   while (results_base != query->result_size) {
result->u64 +=
-   r600_query_read_result(map + results_base, 0, 
2, true);
+   r600_query_read_result(buffer + results_base, 
0, 2, true);
results_base += 16;
}
break;
-   case PIPE_QUERY_OCCLUSION_PREDICATE:
-   while (results_base != qbuf->results_end) {
+   }
+   case PIPE_QUERY_OCCLUSION_PREDICATE: {
+   unsigned results_base = 0;
+   while (results_base != query->result_size) {
result->b = result->b ||
-   r600_query_read_result(map + results_base, 0, 
2, true) != 0;
+   r600_query_read_result(buffer + results_base, 
0, 2, true) != 0;
results_base += 16;
}
break;
+   }
case PIPE_QUERY_TIME_ELAPSED:
-   while (results_base != qbuf->results_end) {
-   result->u64 +=
-   r600_query_read_result(map + results_base, 0, 
2, false);
-   results_base += query->result_size;
-   }
+   result->u64 += r600_query_read_result(buffer, 0, 2, false);
break;
case PIPE_QUERY_TIMESTAMP:
{
-   uint32_t *current_result = (uint32_t*)map;
+   uint32_t *current_result = (uint32_t*)buffer;
result->u64 = (uint64_t)current_result[0] |
  (uint64_t)current_result[1] << 32;
break;
@@ 

[Mesa-dev] [PATCH 00/10] radeon: cleanup and refactor the query implementation

2015-11-13 Thread Nicolai Hähnle
Hi,

in preparation for performance counters, this series makes the implementation
of queries pluggable, and separates query buffer handling from CS emit and
result collection for hardware queries.

Aside from two PIPE_QUERY_GPU_FINISHED-related fixes (using context flush,
picked up from Marek, and fixing a fence leak), this should not affect the
feature set in any way.

Please review!

Thanks,
Nicolai
---
 Makefile.sources   |1 
 r600_pipe_common.c |   46 --
 r600_pipe_common.h |   16 
 r600_query.c   | 1014 ++---
 r600_query.h   |  139 +++
 5 files changed, 734 insertions(+), 482 deletions(-)

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 05/10] radeon: convert software queries to the new style

2015-11-13 Thread Nicolai Hähnle
Software queries are all queries that do not require suspend/resume
and explicit handling of result buffers.

Note that this fixes a fence leak with PIPE_QUERY_GPU_FINISHED, and it
contains Marek's fix to GPU_FINISHED's end_query() handling.
---
 src/gallium/drivers/radeon/r600_query.c | 366 +---
 1 file changed, 194 insertions(+), 172 deletions(-)

diff --git a/src/gallium/drivers/radeon/r600_query.c 
b/src/gallium/drivers/radeon/r600_query.c
index fdab8e3..c7350f1 100644
--- a/src/gallium/drivers/radeon/r600_query.c
+++ b/src/gallium/drivers/radeon/r600_query.c
@@ -51,15 +51,195 @@ struct r600_query {
unsignednum_cs_dw;
/* linked list of queries */
struct list_headlist;
-   /* for custom non-GPU queries */
+   /* For transform feedback: which stream the query is for */
+   unsigned stream;
+};
+
+/* Queries without buffer handling or suspend/resume. */
+struct r600_query_sw {
+   struct r600_query b;
+
uint64_t begin_result;
uint64_t end_result;
/* Fence for GPU_FINISHED. */
struct pipe_fence_handle *fence;
-   /* For transform feedback: which stream the query is for */
-   unsigned stream;
 };
 
+static void r600_query_sw_destroy(struct r600_common_context *rctx,
+ struct r600_query *rquery)
+{
+   struct pipe_screen *screen = rctx->b.screen;
+   struct r600_query_sw *query = (struct r600_query_sw *)rquery;
+
+   screen->fence_reference(screen, &query->fence, NULL);
+   FREE(query);
+}
+
+static enum radeon_value_id winsys_id_from_type(unsigned type)
+{
+   switch (type) {
+   case R600_QUERY_REQUESTED_VRAM: return RADEON_REQUESTED_VRAM_MEMORY;
+   case R600_QUERY_REQUESTED_GTT: return RADEON_REQUESTED_GTT_MEMORY;
+   case R600_QUERY_BUFFER_WAIT_TIME: return RADEON_BUFFER_WAIT_TIME_NS;
+   case R600_QUERY_NUM_CS_FLUSHES: return RADEON_NUM_CS_FLUSHES;
+   case R600_QUERY_NUM_BYTES_MOVED: return RADEON_NUM_BYTES_MOVED;
+   case R600_QUERY_VRAM_USAGE: return RADEON_VRAM_USAGE;
+   case R600_QUERY_GTT_USAGE: return RADEON_GTT_USAGE;
+   case R600_QUERY_GPU_TEMPERATURE: return RADEON_GPU_TEMPERATURE;
+   case R600_QUERY_CURRENT_GPU_SCLK: return RADEON_CURRENT_SCLK;
+   case R600_QUERY_CURRENT_GPU_MCLK: return RADEON_CURRENT_MCLK;
+   default: unreachable("query type does not correspond to winsys id");
+   }
+}
+
+static boolean r600_query_sw_begin(struct r600_common_context *rctx,
+  struct r600_query *rquery)
+{
+   struct r600_query_sw *query = (struct r600_query_sw *)rquery;
+
+   switch(query->b.type) {
+   case PIPE_QUERY_TIMESTAMP_DISJOINT:
+   case PIPE_QUERY_GPU_FINISHED:
+   break;
+   case R600_QUERY_DRAW_CALLS:
+   query->begin_result = rctx->num_draw_calls;
+   break;
+   case R600_QUERY_REQUESTED_VRAM:
+   case R600_QUERY_REQUESTED_GTT:
+   case R600_QUERY_VRAM_USAGE:
+   case R600_QUERY_GTT_USAGE:
+   case R600_QUERY_GPU_TEMPERATURE:
+   case R600_QUERY_CURRENT_GPU_SCLK:
+   case R600_QUERY_CURRENT_GPU_MCLK:
+   query->begin_result = 0;
+   break;
+   case R600_QUERY_BUFFER_WAIT_TIME:
+   case R600_QUERY_NUM_CS_FLUSHES:
+   case R600_QUERY_NUM_BYTES_MOVED: {
+   enum radeon_value_id ws_id = winsys_id_from_type(query->b.type);
+   query->begin_result = rctx->ws->query_value(rctx->ws, ws_id);
+   break;
+   }
+   case R600_QUERY_GPU_LOAD:
+   query->begin_result = r600_gpu_load_begin(rctx->screen);
+   break;
+   case R600_QUERY_NUM_COMPILATIONS:
+   query->begin_result = 
p_atomic_read(&rctx->screen->num_compilations);
+   break;
+   case R600_QUERY_NUM_SHADERS_CREATED:
+   query->begin_result = 
p_atomic_read(&rctx->screen->num_shaders_created);
+   break;
+   default:
+   unreachable("r600_query_sw_begin: bad query type");
+   }
+
+   return TRUE;
+}
+
+static void r600_query_sw_end(struct r600_common_context *rctx,
+ struct r600_query *rquery)
+{
+   struct r600_query_sw *query = (struct r600_query_sw *)rquery;
+
+   switch(query->b.type) {
+   case PIPE_QUERY_TIMESTAMP_DISJOINT:
+   break;
+   case PIPE_QUERY_GPU_FINISHED:
+   rctx->b.flush(&rctx->b, &query->fence, 0);
+   break;
+   case R600_QUERY_DRAW_CALLS:
+   query->begin_result = rctx->num_draw_calls;
+   break;
+   case R600_QUERY_REQUESTED_VRAM:
+   case R600_QUERY_REQUESTED_GTT:
+   case R600_QUERY_VRAM_USAGE:
+   case R600_QUERY_GTT_USAGE:
+   case R600_QUERY_GPU_TEMPERATURE:
+   case R600_QUERY_CURRENT_GPU_SCLK:
+   case R600_QUERY_CURRENT_GPU

Re: [Mesa-dev] [PATCH] r600g: Support TGSI_SEMANTIC_HELPER_INVOCATION

2015-11-13 Thread Nicolai Hähnle

On 13.11.2015 00:14, Glenn Kennard wrote:

Signed-off-by: Glenn Kennard 
---
Maybe there is a better way to check if a thread is a helper invocation?


Is ctx->face_gpr guaranteed to be initialized when 
load_helper_invocation is called?


Aside, I'm not sure I understand correctly what this is supposed to do. 
The values you're querying are related to multi-sampling, but my 
understanding has always been that helper invocations can also happen 
without multi-sampling: you always want to process 2x2 quads of pixels 
at a time to be able to compute derivatives for texture sampling. When 
the boundary of primitive intersects such a quad, you get helper 
invocations outside the primitive.


Cheers,
Nicolai


  src/gallium/drivers/r600/r600_shader.c | 83 +-
  1 file changed, 72 insertions(+), 11 deletions(-)

diff --git a/src/gallium/drivers/r600/r600_shader.c 
b/src/gallium/drivers/r600/r600_shader.c
index 560197c..a227d78 100644
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -530,7 +530,8 @@ static int r600_spi_sid(struct r600_shader_io * io)
name == TGSI_SEMANTIC_PSIZE ||
name == TGSI_SEMANTIC_EDGEFLAG ||
name == TGSI_SEMANTIC_FACE ||
-   name == TGSI_SEMANTIC_SAMPLEMASK)
+   name == TGSI_SEMANTIC_SAMPLEMASK ||
+   name == TGSI_SEMANTIC_HELPER_INVOCATION)
index = 0;
else {
if (name == TGSI_SEMANTIC_GENERIC) {
@@ -734,7 +735,8 @@ static int tgsi_declaration(struct r600_shader_ctx *ctx)
case TGSI_FILE_SYSTEM_VALUE:
if (d->Semantic.Name == TGSI_SEMANTIC_SAMPLEMASK ||
d->Semantic.Name == TGSI_SEMANTIC_SAMPLEID ||
-   d->Semantic.Name == TGSI_SEMANTIC_SAMPLEPOS) {
+   d->Semantic.Name == TGSI_SEMANTIC_SAMPLEPOS ||
+   d->Semantic.Name == TGSI_SEMANTIC_HELPER_INVOCATION) {
break; /* Already handled from 
allocate_system_value_inputs */
} else if (d->Semantic.Name == TGSI_SEMANTIC_INSTANCEID) {
if (!ctx->native_integers) {
@@ -776,13 +778,14 @@ static int allocate_system_value_inputs(struct 
r600_shader_ctx *ctx, int gpr_off
struct {
boolean enabled;
int *reg;
-   unsigned name, alternate_name;
+   unsigned associated_semantics[3];
} inputs[2] = {
-   { false, &ctx->face_gpr, TGSI_SEMANTIC_SAMPLEMASK, ~0u }, /* 
lives in Front Face GPR.z */
-
-   { false, &ctx->fixed_pt_position_gpr, TGSI_SEMANTIC_SAMPLEID, 
TGSI_SEMANTIC_SAMPLEPOS } /* SAMPLEID is in Fixed Point Position GPR.w */
+   { false, &ctx->face_gpr, { TGSI_SEMANTIC_SAMPLEMASK /* lives in 
Front Face GPR.z */,
+   TGSI_SEMANTIC_HELPER_INVOCATION, ~0u } },
+   { false, &ctx->fixed_pt_position_gpr, { TGSI_SEMANTIC_SAMPLEID  
/* in Fixed Point Position GPR.w */,
+   TGSI_SEMANTIC_SAMPLEPOS, 
TGSI_SEMANTIC_HELPER_INVOCATION } }
};
-   int i, k, num_regs = 0;
+   int i, k, l, num_regs = 0;

if (tgsi_parse_init(&parse, ctx->tokens) != TGSI_PARSE_OK) {
return 0;
@@ -818,9 +821,11 @@ static int allocate_system_value_inputs(struct 
r600_shader_ctx *ctx, int gpr_off
struct tgsi_full_declaration *d = 
&parse.FullToken.FullDeclaration;
if (d->Declaration.File == TGSI_FILE_SYSTEM_VALUE) {
for (k = 0; k < Elements(inputs); k++) {
-   if (d->Semantic.Name == inputs[k].name 
||
-   d->Semantic.Name == 
inputs[k].alternate_name) {
-   inputs[k].enabled = true;
+   for (l = 0; l < 3; l++) {
+   if (d->Semantic.Name == 
inputs[k].associated_semantics[l]) {
+   inputs[k].enabled = 
true;
+   break;
+   }
}
}
}
@@ -832,7 +837,7 @@ static int allocate_system_value_inputs(struct 
r600_shader_ctx *ctx, int gpr_off
for (i = 0; i < Elements(inputs); i++) {
boolean enabled = inputs[i].enabled;
int *reg = inputs[i].reg;
-   unsigned name = inputs[i].name;
+   unsigned name = inputs[i].associated_semantics[0];

if (enabled) {
int gpr = gpr_offset + num_regs++;
@@ -985,6 +990,56 @@ static int load_sample_position(struct r600_shader_ctx 
*ctx, struct r600_shader_
return t1;
  }

+static int load_helper_invocation(stru

Re: [Mesa-dev] [PATCH 3/9] st/mesa: map semantic driver query types to underlying type

2015-11-13 Thread Nicolai Hähnle

On 13.11.2015 18:34, Samuel Pitoiset wrote:



On 11/13/2015 04:57 PM, Nicolai Hähnle wrote:

---
  src/gallium/include/pipe/p_defines.h   | 2 ++
  src/mesa/state_tracker/st_cb_perfmon.c | 3 +++
  2 files changed, 5 insertions(+)

diff --git a/src/gallium/include/pipe/p_defines.h
b/src/gallium/include/pipe/p_defines.h
index 7f241c8..7ed9f6d 100644
--- a/src/gallium/include/pipe/p_defines.h
+++ b/src/gallium/include/pipe/p_defines.h
@@ -791,6 +791,8 @@ union pipe_query_result
 /* PIPE_QUERY_PRIMITIVES_GENERATED */
 /* PIPE_QUERY_PRIMITIVES_EMITTED */
 /* PIPE_DRIVER_QUERY_TYPE_UINT64 */
+   /* PIPE_DRIVER_QUERY_TYPE_BYTES */
+   /* PIPE_DRIVER_QUERY_TYPE_MICROSECONDS */


When you are at it, please also add /*
PIPE_DRIVER_QUERY_TYPE_MICROSECONDS */ to pipe_query_result.


Sorry, I don't understand. Isn't that what I'm doing here?

Cheers,
Nicolai


With this minor change, this patch is:

Reviewed-by: Samuel Pitoiset 



 /* PIPE_DRIVER_QUERY_TYPE_HZ */
 uint64_t u64;

diff --git a/src/mesa/state_tracker/st_cb_perfmon.c
b/src/mesa/state_tracker/st_cb_perfmon.c
index 4ec6d86..dedb8f5 100644
--- a/src/mesa/state_tracker/st_cb_perfmon.c
+++ b/src/mesa/state_tracker/st_cb_perfmon.c
@@ -334,6 +334,9 @@ st_init_perfmon(struct st_context *st)
   c->Name = info.name;
   switch (info.type) {
  case PIPE_DRIVER_QUERY_TYPE_UINT64:
+case PIPE_DRIVER_QUERY_TYPE_BYTES:
+case PIPE_DRIVER_QUERY_TYPE_MICROSECONDS:
+case PIPE_DRIVER_QUERY_TYPE_HZ:
 c->Minimum.u64 = 0;
 c->Maximum.u64 = info.max_value.u64 ?
info.max_value.u64 : -1;
 c->Type = GL_UNSIGNED_INT64_AMD;





___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/9] gallium: remove pipe_driver_query_group_info field type

2015-11-13 Thread Nicolai Hähnle

On 13.11.2015 18:35, Samuel Pitoiset wrote:

On 11/13/2015 04:57 PM, Nicolai Hähnle wrote:

This was only used to implement an unnecessarily restrictive
interpretation
of the spec of AMD_performance_monitor. The spec says

   A performance monitor consists of a number of hardware and software
   counters that can be sampled by the GPU and reported back to the
   application.

I guess one could take this as a requirement that counters _must_ be
sampled
by the GPU, but then why are they called _software_ counters? Besides,
there's not much reason _not_ to expose all counters that are available,
and this simplifies the code.


The spec says:

"
While BeginPerfMonitorAMD does mark the beginning of performance counter
collection, the counters do not begin collecting immediately.  Rather,
the counters begin collection when BeginPerfMonitorAMD is processed by
the hardware.  That is, the API is asynchronous, and performance counter
collection does not begin until the graphics hardware processes the
BeginPerfMonitorAMD command.
"


Right. I interpreted this as the authors' attempt to say that the 
counting happens in what other parts of OpenGL traditionally call "the 
server", i.e. the Begin/EndPerfMonitorAMD commands can be used to 
bracket draw calls in the way you'd usually expect, in the same way that 
e.g. changing the DepthFunc only affects rendering once the graphics 
hardware "processes the DepthFunc command".




This is why I introduced the notion of group of GPU counters in Gallium,
because "processed by the hardware", "asynchronous" and "command" seem
like the spec is talking about GPU only.

In which world, software counters are sampled by the GPU? :-)
This spec is definitely not clear about that...

Anyway, I disagree about this patch because :
1) we need to be agreed about what amd_performance_monitor must expose
or not. Maybe it's time to ask the guys who wrote it?


Well, Catalyst exposes only hardware counters in 
AMD_performance_monitor. But that's beside the point.


The real point is that the driver_query_group stuff is *only* used for 
AMD_performance_monitor. So it makes no sense that a driver would ever 
expose a driver_query_group that was not intended to be exposed via that 
extension.


I understand that the group_type was added with good intentions. I might 
have done the same. But in over a year (judging by the commit dates), no 
other use case for driver_query_groups has come up.


So really, this is a question for everybody who cares about nouveau, 
because nouveau is the only driver that (if a #define is enabled) 
advertises a CPU driver_query_group.


Do you want that group to be accessible via AMD_performance_monitor? 
Then be happy with this patch. Do you not want that group to be so 
accessible? Then just remove it, because it serves no purpose either way.




2) this doesn't really simplify code.


The patch only removes LOCs, so I find that a weird argument ;)

Cheers,
Nicolai




---
  src/gallium/drivers/nouveau/nvc0/nvc0_query.c |  3 ---
  src/gallium/include/pipe/p_defines.h  |  7 ---
  src/mesa/state_tracker/st_cb_perfmon.c| 30
---
  3 files changed, 40 deletions(-)

diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_query.c
b/src/gallium/drivers/nouveau/nvc0/nvc0_query.c
index f539210..a1d6162 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_query.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_query.c
@@ -200,7 +200,6 @@ nvc0_screen_get_driver_query_group_info(struct
pipe_screen *pscreen,
 if (id == NVC0_HW_SM_QUERY_GROUP) {
if (screen->compute) {
   info->name = "MP counters";
- info->type = PIPE_DRIVER_QUERY_GROUP_TYPE_GPU;

   /* Because we can't expose the number of hardware counters
needed for
* each different query, we don't want to allow more than
one active
@@ -224,7 +223,6 @@ nvc0_screen_get_driver_query_group_info(struct
pipe_screen *pscreen,
if (screen->compute) {
   if (screen->base.class_3d < NVE4_3D_CLASS) {
  info->name = "Performance metrics";
-info->type = PIPE_DRIVER_QUERY_GROUP_TYPE_GPU;
  info->max_active_queries = 1;
  info->num_queries = NVC0_HW_METRIC_QUERY_COUNT;
  return 1;
@@ -234,7 +232,6 @@ nvc0_screen_get_driver_query_group_info(struct
pipe_screen *pscreen,
  #ifdef NOUVEAU_ENABLE_DRIVER_STATISTICS
 else if (id == NVC0_SW_QUERY_DRV_STAT_GROUP) {
info->name = "Driver statistics";
-  info->type = PIPE_DRIVER_QUERY_GROUP_TYPE_CPU;
info->max_active_queries = NVC0_SW_QUERY_DRV_STAT_COUNT;
info->num_queries = NVC0_SW_QUERY_DRV_STAT_COUNT;
return 1;
diff --git a/src/gallium/include/pipe/p_defines.h
b/src/gallium/include/pipe/p_defines.h
index 7240154..7f241c8 100644
--- a/s

[Mesa-dev] [PATCH] st/mesa: maintain active perfmon counters in an array (v2)

2015-11-13 Thread Nicolai Hähnle
It is easy enough to pre-determine the required size, and arrays are
generally better behaved especially when they get large.

v2: make sure init_perf_monitor returns true when no counters are active
(spotted by Samuel Pitoiset)
---
Thanks Samuel, good catch! I did test with piglit and the tests passed, so
probably CALLOC returned non-null with a zero size, but it's better not to
rely on that.

Cheers,
Nicolai
---
 src/mesa/state_tracker/st_cb_perfmon.c | 81 --
 src/mesa/state_tracker/st_cb_perfmon.h | 18 
 2 files changed, 58 insertions(+), 41 deletions(-)

diff --git a/src/mesa/state_tracker/st_cb_perfmon.c 
b/src/mesa/state_tracker/st_cb_perfmon.c
index ec12eb2..8628e23 100644
--- a/src/mesa/state_tracker/st_cb_perfmon.c
+++ b/src/mesa/state_tracker/st_cb_perfmon.c
@@ -42,15 +42,14 @@ init_perf_monitor(struct gl_context *ctx, struct 
gl_perf_monitor_object *m)
struct st_context *st = st_context(ctx);
struct st_perf_monitor_object *stm = st_perf_monitor_object(m);
struct pipe_context *pipe = st->pipe;
+   unsigned num_active_counters = 0;
int gid, cid;
 
st_flush_bitmap_cache(st);
 
-   /* Create a query for each active counter. */
+   /* Determine the number of active counters. */
for (gid = 0; gid < ctx->PerfMonitor.NumGroups; gid++) {
   const struct gl_perf_monitor_group *g = &ctx->PerfMonitor.Groups[gid];
-  const struct st_perf_monitor_group *stg = &st->perfmon[gid];
-  BITSET_WORD tmp;
 
   if (m->ActiveGroups[gid] > g->MaxActiveCounters) {
  /* Maximum number of counters reached. Cannot start the session. */
@@ -61,19 +60,32 @@ init_perf_monitor(struct gl_context *ctx, struct 
gl_perf_monitor_object *m)
  return false;
   }
 
+  num_active_counters += m->ActiveGroups[gid];
+   }
+
+   if (!num_active_counters)
+  return true;
+
+   stm->active_counters = CALLOC(num_active_counters,
+ sizeof(*stm->active_counters));
+   if (!stm->active_counters)
+  return false;
+
+   /* Create a query for each active counter. */
+   for (gid = 0; gid < ctx->PerfMonitor.NumGroups; gid++) {
+  const struct gl_perf_monitor_group *g = &ctx->PerfMonitor.Groups[gid];
+  const struct st_perf_monitor_group *stg = &st->perfmon[gid];
+  BITSET_WORD tmp;
+
   BITSET_FOREACH_SET(cid, tmp, m->ActiveCounters[gid], g->NumCounters) {
  const struct st_perf_monitor_counter *stc = &stg->counters[cid];
- struct st_perf_counter_object *cntr;
-
- cntr = CALLOC_STRUCT(st_perf_counter_object);
- if (!cntr)
-return false;
+ struct st_perf_counter_object *cntr =
+&stm->active_counters[stm->num_active_counters];
 
  cntr->query= pipe->create_query(pipe, stc->query_type, 0);
  cntr->id   = cid;
  cntr->group_id = gid;
-
- list_addtail(&cntr->list, &stm->active_counters);
+ ++stm->num_active_counters;
   }
}
return true;
@@ -83,24 +95,24 @@ static void
 reset_perf_monitor(struct st_perf_monitor_object *stm,
struct pipe_context *pipe)
 {
-   struct st_perf_counter_object *cntr, *tmp;
+   unsigned i;
 
-   LIST_FOR_EACH_ENTRY_SAFE(cntr, tmp, &stm->active_counters, list) {
-  if (cntr->query)
- pipe->destroy_query(pipe, cntr->query);
-  list_del(&cntr->list);
-  free(cntr);
+   for (i = 0; i < stm->num_active_counters; ++i) {
+  struct pipe_query *query = stm->active_counters[i].query;
+  if (query)
+ pipe->destroy_query(pipe, query);
}
+   FREE(stm->active_counters);
+   stm->active_counters = NULL;
+   stm->num_active_counters = 0;
 }
 
 static struct gl_perf_monitor_object *
 st_NewPerfMonitor(struct gl_context *ctx)
 {
struct st_perf_monitor_object *stq = 
ST_CALLOC_STRUCT(st_perf_monitor_object);
-   if (stq) {
-  list_inithead(&stq->active_counters);
+   if (stq)
   return &stq->base;
-   }
return NULL;
 }
 
@@ -119,9 +131,9 @@ st_BeginPerfMonitor(struct gl_context *ctx, struct 
gl_perf_monitor_object *m)
 {
struct st_perf_monitor_object *stm = st_perf_monitor_object(m);
struct pipe_context *pipe = st_context(ctx)->pipe;
-   struct st_perf_counter_object *cntr;
+   unsigned i;
 
-   if (LIST_IS_EMPTY(&stm->active_counters)) {
+   if (!stm->num_active_counters) {
   /* Create a query for each active counter before starting
* a new monitoring session. */
   if (!init_perf_monitor(ctx, m))
@@ -129,8 +141,9 @@ st_BeginPerfMonitor(struct gl_context *ctx, struct 
gl_perf_monitor_object *m)
}
 
/* Start the query for each active counter. */
-   LIST_FOR_EACH_ENTRY(cntr, &stm->active_counters, list) {
-  if (!pipe->begin_query(pipe, cntr->query))
+   for (i = 0; i < stm->num_active_counters; ++i) {
+  struct pipe_query *query = stm->active_counters[i].query;
+  if (!pipe->begin_query(pipe, query))
   goto fail;
}
return true;
@@ -14

Re: [Mesa-dev] [PATCH 0/9] gallium: batch query objects and related cleanups

2015-11-13 Thread Nicolai Hähnle

Hi Samuel,

thanks for taking a look!

On 13.11.2015 18:35, Samuel Pitoiset wrote:

Did you run amd_performance_monitor piglit tests to make sure all of
your changes didn't break anything?


Yes, everything passes here.



Did you test on nvc0 driver which is the only driver that currently
exposes GL_AMD_performance_monitor? In case you didn't, I'll test it
myself in the next few days. You might not have the hardware. :-)


Sorry, I don't have the hardware.

Thanks,
Nicolai



Thanks.

On 11/13/2015 04:57 PM, Nicolai Hähnle wrote:

Hi,

the main point of this patch series is to introduce batch query objects.

For AMD_performance_monitor, hardware may not be able to start and stop
performance counters independently of each other. The current query
interface
does not fit such hardware well.

With this series, drivers can mark driver-specific queries with the
PIPE_DRIVER_QUERY_FLAG_BATCH flag, which indicates that those queries
require
the use of batch query objects. Batch query objects are created with an
immutable list of queries, which requires a new entry point in
pipe_context,
but apart from that they use the same begin_query/end_query/etc. entry
points.

The radeon-specific part that actually makes use of this feature is
not quite
ready yet, but I already wanted to get this part out there for feedback.
Please review!

Thanks,
Nicolai
---
  gallium/auxiliary/hud/hud_context.c   |   24 ++
  gallium/auxiliary/hud/hud_driver_query.c  |  249
+-
  gallium/auxiliary/hud/hud_private.h   |   13 +
  gallium/drivers/nouveau/nvc0/nvc0_query.c |4
  gallium/include/pipe/p_context.h  |3
  gallium/include/pipe/p_defines.h  |   36 ++--
  mesa/state_tracker/st_cb_perfmon.c|  247
-
  mesa/state_tracker/st_cb_perfmon.h|   32 +++
  mesa/state_tracker/st_context.h   |3
  9 files changed, 437 insertions(+), 174 deletions(-)

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev





___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/9] gallium: remove pipe_driver_query_group_info field type

2015-11-13 Thread Nicolai Hähnle

On 13.11.2015 19:27, Ilia Mirkin wrote:

On Fri, Nov 13, 2015 at 1:23 PM, Nicolai Hähnle  wrote:

So really, this is a question for everybody who cares about nouveau, because
nouveau is the only driver that (if a #define is enabled) advertises a CPU
driver_query_group.

Do you want that group to be accessible via AMD_performance_monitor? Then be
happy with this patch. Do you not want that group to be so accessible? Then
just remove it, because it serves no purpose either way.


There's also the HUD, and Samuel's WIP NVIDIA PerfKit-style library impl.


The HUD doesn't care about groups. If Samuel really cares about this for 
his library (which I haven't seen - where is it?), I can drop this patch.


Cheers,
Nicolai
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v2 4/9] st/mesa: store mapping from perfmon counter to query type

2015-11-13 Thread Nicolai Hähnle
Previously, when a performance monitor was initialized, an inner loop through
all driver queries with string comparisons for each enabled performance
monitor counter was used. This hurts when a driver exposes lots of queries.

Reviewed-by: Samuel Pitoiset 
---
 src/mesa/state_tracker/st_cb_perfmon.c | 74 +++---
 src/mesa/state_tracker/st_cb_perfmon.h | 14 +++
 src/mesa/state_tracker/st_context.h|  3 ++
 3 files changed, 49 insertions(+), 42 deletions(-)

diff --git a/src/mesa/state_tracker/st_cb_perfmon.c 
b/src/mesa/state_tracker/st_cb_perfmon.c
index dedb8f5..80ff170 100644
--- a/src/mesa/state_tracker/st_cb_perfmon.c
+++ b/src/mesa/state_tracker/st_cb_perfmon.c
@@ -36,48 +36,20 @@
 #include "pipe/p_screen.h"
 #include "util/u_memory.h"
 
-/**
- * Return a PIPE_QUERY_x type >= PIPE_QUERY_DRIVER_SPECIFIC, or -1 if
- * the driver-specific query doesn't exist.
- */
-static int
-find_query_type(struct pipe_screen *screen, const char *name)
-{
-   int num_queries;
-   int type = -1;
-   int i;
-
-   num_queries = screen->get_driver_query_info(screen, 0, NULL);
-   if (!num_queries)
-  return type;
-
-   for (i = 0; i < num_queries; i++) {
-  struct pipe_driver_query_info info;
-
-  if (!screen->get_driver_query_info(screen, i, &info))
- continue;
-
-  if (!strncmp(info.name, name, strlen(name))) {
- type = info.query_type;
- break;
-  }
-   }
-   return type;
-}
-
 static bool
 init_perf_monitor(struct gl_context *ctx, struct gl_perf_monitor_object *m)
 {
+   struct st_context *st = st_context(ctx);
struct st_perf_monitor_object *stm = st_perf_monitor_object(m);
-   struct pipe_screen *screen = st_context(ctx)->pipe->screen;
-   struct pipe_context *pipe = st_context(ctx)->pipe;
+   struct pipe_context *pipe = st->pipe;
int gid, cid;
 
-   st_flush_bitmap_cache(st_context(ctx));
+   st_flush_bitmap_cache(st);
 
/* Create a query for each active counter. */
for (gid = 0; gid < ctx->PerfMonitor.NumGroups; gid++) {
   const struct gl_perf_monitor_group *g = &ctx->PerfMonitor.Groups[gid];
+  const struct st_perf_monitor_group *stg = &st->perfmon[gid];
 
   if (m->ActiveGroups[gid] > g->MaxActiveCounters) {
  /* Maximum number of counters reached. Cannot start the session. */
@@ -90,20 +62,17 @@ init_perf_monitor(struct gl_context *ctx, struct 
gl_perf_monitor_object *m)
 
   for (cid = 0; cid < g->NumCounters; cid++) {
  const struct gl_perf_monitor_counter *c = &g->Counters[cid];
+ const struct st_perf_monitor_counter *stc = &stg->counters[cid];
  struct st_perf_counter_object *cntr;
- int query_type;
 
  if (!BITSET_TEST(m->ActiveCounters[gid], cid))
 continue;
 
- query_type = find_query_type(screen, c->Name);
- assert(query_type != -1);
-
  cntr = CALLOC_STRUCT(st_perf_counter_object);
  if (!cntr)
 return false;
 
- cntr->query= pipe->create_query(pipe, query_type, 0);
+ cntr->query= pipe->create_query(pipe, stc->query_type, 0);
  cntr->id   = cid;
  cntr->group_id = gid;
 
@@ -286,6 +255,7 @@ st_init_perfmon(struct st_context *st)
struct gl_perf_monitor_state *perfmon = &st->ctx->PerfMonitor;
struct pipe_screen *screen = st->pipe->screen;
struct gl_perf_monitor_group *groups = NULL;
+   struct st_perf_monitor_group *stgroups = NULL;
int num_counters, num_groups;
int gid, cid;
 
@@ -304,26 +274,36 @@ st_init_perfmon(struct st_context *st)
if (!groups)
   return false;
 
+   stgroups = CALLOC(num_groups, sizeof(*stgroups));
+   if (!stgroups)
+  goto fail_only_groups;
+
for (gid = 0; gid < num_groups; gid++) {
   struct gl_perf_monitor_group *g = &groups[perfmon->NumGroups];
   struct pipe_driver_query_group_info group_info;
   struct gl_perf_monitor_counter *counters = NULL;
+  struct st_perf_monitor_counter *stcounters = NULL;
 
   if (!screen->get_driver_query_group_info(screen, gid, &group_info))
  continue;
 
   g->Name = group_info.name;
   g->MaxActiveCounters = group_info.max_active_queries;
-  g->NumCounters = 0;
-  g->Counters = NULL;
 
   if (group_info.num_queries)
  counters = CALLOC(group_info.num_queries, sizeof(*counters));
   if (!counters)
  goto fail;
+  g->Counters = counters;
+
+  stcounters = CALLOC(group_info.num_queries, sizeof(*stcounters));
+  if (!stcounters)
+ goto fail;
+  stgroups[perfmon->NumGroups].counters = stcounters;
 
   for (cid = 0; cid < num_counters; cid++) {
  struct gl_perf_monitor_counter *c = &counters[g->NumCounters];
+ struct st_perf_monitor_counter *stc = &stcounters[g->NumCounters];
  struct pipe_driver_query_info info;
 
  if (!screen->get_driver_query_info(screen, cid, &info))
@@ -359,18 +339,25 @@ st_init_perfmon(struct st_context *st)
  

[Mesa-dev] [PATCH v2 7/9] gallium: add the concept of batch queries

2015-11-13 Thread Nicolai Hähnle
Some drivers (in particular radeon[si], but also freedreno judging from
a quick grep) may want to expose performance counters that cannot be
individually enabled or disabled.

Allow such drivers to mark driver-specific queries as requiring a new
type of batch query object that is used to start and stop a list of queries
simultaneously.

v2: documentation for create_batch_query
---
 src/gallium/drivers/nouveau/nvc0/nvc0_query.c |  1 +
 src/gallium/include/pipe/p_context.h  | 19 +++
 src/gallium/include/pipe/p_defines.h  | 27 +--
 3 files changed, 41 insertions(+), 6 deletions(-)

diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_query.c 
b/src/gallium/drivers/nouveau/nvc0/nvc0_query.c
index a1d6162..0608337 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_query.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_query.c
@@ -162,6 +162,7 @@ nvc0_screen_get_driver_query_info(struct pipe_screen 
*pscreen,
info->max_value.u64 = 0;
info->type = PIPE_DRIVER_QUERY_TYPE_UINT64;
info->group_id = -1;
+   info->flags = 0;
 
 #ifdef NOUVEAU_ENABLE_DRIVER_STATISTICS
if (id < num_sw_queries)
diff --git a/src/gallium/include/pipe/p_context.h 
b/src/gallium/include/pipe/p_context.h
index 27f358f..be7447d 100644
--- a/src/gallium/include/pipe/p_context.h
+++ b/src/gallium/include/pipe/p_context.h
@@ -116,6 +116,25 @@ struct pipe_context {
unsigned query_type,
unsigned index );
 
+   /**
+* Create a query object that queries all given query types simultaneously.
+*
+* This can only be used for those query types for which
+* get_driver_query_info indicates that it must be used. Only one batch
+* query object may be active at a time.
+*
+* There may be additional constraints on which query types can be used
+* together, in particular those that are implied by
+* get_driver_query_group_info.
+*
+* \param num_queries the number of query types
+* \param query_types array of \p num_queries query types
+* \return a query object, or NULL on error.
+*/
+   struct pipe_query *(*create_batch_query)( struct pipe_context *pipe,
+ unsigned num_queries,
+ unsigned *query_types );
+
void (*destroy_query)(struct pipe_context *pipe,
  struct pipe_query *q);
 
diff --git a/src/gallium/include/pipe/p_defines.h 
b/src/gallium/include/pipe/p_defines.h
index 7ed9f6d..b3c8b9f 100644
--- a/src/gallium/include/pipe/p_defines.h
+++ b/src/gallium/include/pipe/p_defines.h
@@ -776,6 +776,16 @@ struct pipe_query_data_pipeline_statistics
 };
 
 /**
+ * For batch queries.
+ */
+union pipe_numeric_type_union
+{
+   uint64_t u64;
+   uint32_t u32;
+   float f;
+};
+
+/**
  * Query result (returned by pipe_context::get_query_result).
  */
 union pipe_query_result
@@ -811,6 +821,9 @@ union pipe_query_result
 
/* PIPE_QUERY_PIPELINE_STATISTICS */
struct pipe_query_data_pipeline_statistics pipeline_statistics;
+
+   /* batch queries */
+   union pipe_numeric_type_union batch[0];
 };
 
 union pipe_color_union
@@ -840,12 +853,13 @@ enum pipe_driver_query_result_type
PIPE_DRIVER_QUERY_RESULT_TYPE_CUMULATIVE = 1,
 };
 
-union pipe_numeric_type_union
-{
-   uint64_t u64;
-   uint32_t u32;
-   float f;
-};
+/**
+ * Some hardware requires some hardware-specific queries to be submitted
+ * as batched queries. The corresponding query objects are created using
+ * create_batch_query, and at most one such query may be active at
+ * any time.
+ */
+#define PIPE_DRIVER_QUERY_FLAG_BATCH (1 << 0)
 
 struct pipe_driver_query_info
 {
@@ -855,6 +869,7 @@ struct pipe_driver_query_info
enum pipe_driver_query_type type;
enum pipe_driver_query_result_type result_type;
unsigned group_id;
+   unsigned flags;
 };
 
 struct pipe_driver_query_group_info
-- 
2.5.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v2 0/9] gallium: batch query objects and related cleanups

2015-11-13 Thread Nicolai Hähnle
Hi,

I have updated patches 6 - 9. Samuel, thank you for your input and I hope
you find your points to be resolved satisfactorily ;)

Cheers,
Nicolai
---
nha@deadlights:~/amd/mesa$ git diff master | diffstat
 gallium/auxiliary/hud/hud_context.c   |   24 +-
 gallium/auxiliary/hud/hud_driver_query.c  |  266 +-
 gallium/auxiliary/hud/hud_private.h   |   13 +
 gallium/drivers/nouveau/nvc0/nvc0_query.c |4 
 gallium/include/pipe/p_context.h  |   19 ++
 gallium/include/pipe/p_defines.h  |   36 ++--
 mesa/state_tracker/st_cb_perfmon.c|  253 
 mesa/state_tracker/st_cb_perfmon.h|   32 ++-
 mesa/state_tracker/st_context.h   |3 
 9 files changed, 475 insertions(+), 175 deletions(-)

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v2 5/9] st/mesa: use BITSET_FOREACH_SET to loop through active perfmon counters

2015-11-13 Thread Nicolai Hähnle
Reviewed-by: Samuel Pitoiset 
---
 src/mesa/state_tracker/st_cb_perfmon.c | 7 ++-
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/src/mesa/state_tracker/st_cb_perfmon.c 
b/src/mesa/state_tracker/st_cb_perfmon.c
index 80ff170..ec12eb2 100644
--- a/src/mesa/state_tracker/st_cb_perfmon.c
+++ b/src/mesa/state_tracker/st_cb_perfmon.c
@@ -50,6 +50,7 @@ init_perf_monitor(struct gl_context *ctx, struct 
gl_perf_monitor_object *m)
for (gid = 0; gid < ctx->PerfMonitor.NumGroups; gid++) {
   const struct gl_perf_monitor_group *g = &ctx->PerfMonitor.Groups[gid];
   const struct st_perf_monitor_group *stg = &st->perfmon[gid];
+  BITSET_WORD tmp;
 
   if (m->ActiveGroups[gid] > g->MaxActiveCounters) {
  /* Maximum number of counters reached. Cannot start the session. */
@@ -60,14 +61,10 @@ init_perf_monitor(struct gl_context *ctx, struct 
gl_perf_monitor_object *m)
  return false;
   }
 
-  for (cid = 0; cid < g->NumCounters; cid++) {
- const struct gl_perf_monitor_counter *c = &g->Counters[cid];
+  BITSET_FOREACH_SET(cid, tmp, m->ActiveCounters[gid], g->NumCounters) {
  const struct st_perf_monitor_counter *stc = &stg->counters[cid];
  struct st_perf_counter_object *cntr;
 
- if (!BITSET_TEST(m->ActiveCounters[gid], cid))
-continue;
-
  cntr = CALLOC_STRUCT(st_perf_counter_object);
  if (!cntr)
 return false;
-- 
2.5.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v2 3/9] st/mesa: map semantic driver query types to underlying type

2015-11-13 Thread Nicolai Hähnle
Reviewed-by: Samuel Pitoiset 
---
 src/gallium/include/pipe/p_defines.h   | 2 ++
 src/mesa/state_tracker/st_cb_perfmon.c | 3 +++
 2 files changed, 5 insertions(+)

diff --git a/src/gallium/include/pipe/p_defines.h 
b/src/gallium/include/pipe/p_defines.h
index 7f241c8..7ed9f6d 100644
--- a/src/gallium/include/pipe/p_defines.h
+++ b/src/gallium/include/pipe/p_defines.h
@@ -791,6 +791,8 @@ union pipe_query_result
/* PIPE_QUERY_PRIMITIVES_GENERATED */
/* PIPE_QUERY_PRIMITIVES_EMITTED */
/* PIPE_DRIVER_QUERY_TYPE_UINT64 */
+   /* PIPE_DRIVER_QUERY_TYPE_BYTES */
+   /* PIPE_DRIVER_QUERY_TYPE_MICROSECONDS */
/* PIPE_DRIVER_QUERY_TYPE_HZ */
uint64_t u64;
 
diff --git a/src/mesa/state_tracker/st_cb_perfmon.c 
b/src/mesa/state_tracker/st_cb_perfmon.c
index 4ec6d86..dedb8f5 100644
--- a/src/mesa/state_tracker/st_cb_perfmon.c
+++ b/src/mesa/state_tracker/st_cb_perfmon.c
@@ -334,6 +334,9 @@ st_init_perfmon(struct st_context *st)
  c->Name = info.name;
  switch (info.type) {
 case PIPE_DRIVER_QUERY_TYPE_UINT64:
+case PIPE_DRIVER_QUERY_TYPE_BYTES:
+case PIPE_DRIVER_QUERY_TYPE_MICROSECONDS:
+case PIPE_DRIVER_QUERY_TYPE_HZ:
c->Minimum.u64 = 0;
c->Maximum.u64 = info.max_value.u64 ? info.max_value.u64 : -1;
c->Type = GL_UNSIGNED_INT64_AMD;
-- 
2.5.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v2 1/9] gallium: remove pipe_driver_query_group_info field type

2015-11-13 Thread Nicolai Hähnle
This was only used to implement an unnecessarily restrictive interpretation
of the spec of AMD_performance_monitor. The spec says

  A performance monitor consists of a number of hardware and software
  counters that can be sampled by the GPU and reported back to the
  application.

I guess one could take this as a requirement that counters _must_ be sampled
by the GPU, but then why are they called _software_ counters? Besides,
there's not much reason _not_ to expose all counters that are available,
and this simplifies the code.
---
 src/gallium/drivers/nouveau/nvc0/nvc0_query.c |  3 ---
 src/gallium/include/pipe/p_defines.h  |  7 ---
 src/mesa/state_tracker/st_cb_perfmon.c| 30 ---
 3 files changed, 40 deletions(-)

diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_query.c 
b/src/gallium/drivers/nouveau/nvc0/nvc0_query.c
index f539210..a1d6162 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_query.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_query.c
@@ -200,7 +200,6 @@ nvc0_screen_get_driver_query_group_info(struct pipe_screen 
*pscreen,
if (id == NVC0_HW_SM_QUERY_GROUP) {
   if (screen->compute) {
  info->name = "MP counters";
- info->type = PIPE_DRIVER_QUERY_GROUP_TYPE_GPU;
 
  /* Because we can't expose the number of hardware counters needed for
   * each different query, we don't want to allow more than one active
@@ -224,7 +223,6 @@ nvc0_screen_get_driver_query_group_info(struct pipe_screen 
*pscreen,
   if (screen->compute) {
  if (screen->base.class_3d < NVE4_3D_CLASS) {
 info->name = "Performance metrics";
-info->type = PIPE_DRIVER_QUERY_GROUP_TYPE_GPU;
 info->max_active_queries = 1;
 info->num_queries = NVC0_HW_METRIC_QUERY_COUNT;
 return 1;
@@ -234,7 +232,6 @@ nvc0_screen_get_driver_query_group_info(struct pipe_screen 
*pscreen,
 #ifdef NOUVEAU_ENABLE_DRIVER_STATISTICS
else if (id == NVC0_SW_QUERY_DRV_STAT_GROUP) {
   info->name = "Driver statistics";
-  info->type = PIPE_DRIVER_QUERY_GROUP_TYPE_CPU;
   info->max_active_queries = NVC0_SW_QUERY_DRV_STAT_COUNT;
   info->num_queries = NVC0_SW_QUERY_DRV_STAT_COUNT;
   return 1;
diff --git a/src/gallium/include/pipe/p_defines.h 
b/src/gallium/include/pipe/p_defines.h
index 7240154..7f241c8 100644
--- a/src/gallium/include/pipe/p_defines.h
+++ b/src/gallium/include/pipe/p_defines.h
@@ -829,12 +829,6 @@ enum pipe_driver_query_type
PIPE_DRIVER_QUERY_TYPE_HZ   = 6,
 };
 
-enum pipe_driver_query_group_type
-{
-   PIPE_DRIVER_QUERY_GROUP_TYPE_CPU = 0,
-   PIPE_DRIVER_QUERY_GROUP_TYPE_GPU = 1,
-};
-
 /* Whether an average value per frame or a cumulative value should be
  * displayed.
  */
@@ -864,7 +858,6 @@ struct pipe_driver_query_info
 struct pipe_driver_query_group_info
 {
const char *name;
-   enum pipe_driver_query_group_type type;
unsigned max_active_queries;
unsigned num_queries;
 };
diff --git a/src/mesa/state_tracker/st_cb_perfmon.c 
b/src/mesa/state_tracker/st_cb_perfmon.c
index 1bb5be3..4ec6d86 100644
--- a/src/mesa/state_tracker/st_cb_perfmon.c
+++ b/src/mesa/state_tracker/st_cb_perfmon.c
@@ -65,27 +65,6 @@ find_query_type(struct pipe_screen *screen, const char *name)
return type;
 }
 
-/**
- * Return TRUE if the underlying driver expose GPU counters.
- */
-static bool
-has_gpu_counters(struct pipe_screen *screen)
-{
-   int num_groups, gid;
-
-   num_groups = screen->get_driver_query_group_info(screen, 0, NULL);
-   for (gid = 0; gid < num_groups; gid++) {
-  struct pipe_driver_query_group_info group_info;
-
-  if (!screen->get_driver_query_group_info(screen, gid, &group_info))
- continue;
-
-  if (group_info.type == PIPE_DRIVER_QUERY_GROUP_TYPE_GPU)
- return true;
-   }
-   return false;
-}
-
 static bool
 init_perf_monitor(struct gl_context *ctx, struct gl_perf_monitor_object *m)
 {
@@ -313,12 +292,6 @@ st_init_perfmon(struct st_context *st)
if (!screen->get_driver_query_info || !screen->get_driver_query_group_info)
   return false;
 
-   if (!has_gpu_counters(screen)) {
-  /* According to the spec, GL_AMD_performance_monitor must only
-   * expose GPU counters. */
-  return false;
-   }
-
/* Get the number of available queries. */
num_counters = screen->get_driver_query_info(screen, 0, NULL);
if (!num_counters)
@@ -339,9 +312,6 @@ st_init_perfmon(struct st_context *st)
   if (!screen->get_driver_query_group_info(screen, gid, &group_info))
  continue;
 
-  if (group_info.type != PIPE_DRIVER_QUERY_GROUP_TYPE_GPU)
- continue;
-
   g->Name = group_info.name;
   g->MaxActiveCounters = group_info.max_active_queries;
   g->NumCounters = 0;
-- 
2.5.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v2 9/9] st/mesa: add support for batch driver queries to perfmon

2015-11-13 Thread Nicolai Hähnle
v2: forgot a null-pointer check (spotted by Samuel Pitoiset)
---
 src/mesa/state_tracker/st_cb_perfmon.c | 78 +++---
 src/mesa/state_tracker/st_cb_perfmon.h |  6 +++
 2 files changed, 77 insertions(+), 7 deletions(-)

diff --git a/src/mesa/state_tracker/st_cb_perfmon.c 
b/src/mesa/state_tracker/st_cb_perfmon.c
index 8628e23..39c3902 100644
--- a/src/mesa/state_tracker/st_cb_perfmon.c
+++ b/src/mesa/state_tracker/st_cb_perfmon.c
@@ -42,7 +42,10 @@ init_perf_monitor(struct gl_context *ctx, struct 
gl_perf_monitor_object *m)
struct st_context *st = st_context(ctx);
struct st_perf_monitor_object *stm = st_perf_monitor_object(m);
struct pipe_context *pipe = st->pipe;
+   unsigned *batch = NULL;
unsigned num_active_counters = 0;
+   unsigned max_batch_counters = 0;
+   unsigned num_batch_counters = 0;
int gid, cid;
 
st_flush_bitmap_cache(st);
@@ -50,6 +53,7 @@ init_perf_monitor(struct gl_context *ctx, struct 
gl_perf_monitor_object *m)
/* Determine the number of active counters. */
for (gid = 0; gid < ctx->PerfMonitor.NumGroups; gid++) {
   const struct gl_perf_monitor_group *g = &ctx->PerfMonitor.Groups[gid];
+  const struct st_perf_monitor_group *stg = &st->perfmon[gid];
 
   if (m->ActiveGroups[gid] > g->MaxActiveCounters) {
  /* Maximum number of counters reached. Cannot start the session. */
@@ -61,6 +65,8 @@ init_perf_monitor(struct gl_context *ctx, struct 
gl_perf_monitor_object *m)
   }
 
   num_active_counters += m->ActiveGroups[gid];
+  if (stg->has_batch)
+ max_batch_counters += m->ActiveGroups[gid];
}
 
if (!num_active_counters)
@@ -71,6 +77,12 @@ init_perf_monitor(struct gl_context *ctx, struct 
gl_perf_monitor_object *m)
if (!stm->active_counters)
   return false;
 
+   if (max_batch_counters) {
+  batch = CALLOC(max_batch_counters, sizeof(*batch));
+  if (!batch)
+ return false;
+   }
+
/* Create a query for each active counter. */
for (gid = 0; gid < ctx->PerfMonitor.NumGroups; gid++) {
   const struct gl_perf_monitor_group *g = &ctx->PerfMonitor.Groups[gid];
@@ -82,13 +94,35 @@ init_perf_monitor(struct gl_context *ctx, struct 
gl_perf_monitor_object *m)
  struct st_perf_counter_object *cntr =
 &stm->active_counters[stm->num_active_counters];
 
- cntr->query= pipe->create_query(pipe, stc->query_type, 0);
  cntr->id   = cid;
  cntr->group_id = gid;
+ if (stc->flags & PIPE_DRIVER_QUERY_FLAG_BATCH) {
+cntr->batch_index = num_batch_counters;
+batch[num_batch_counters++] = stc->query_type;
+ } else {
+cntr->query = pipe->create_query(pipe, stc->query_type, 0);
+if (!cntr->query)
+   goto fail;
+ }
  ++stm->num_active_counters;
   }
}
+
+   /* Create the batch query. */
+   if (num_batch_counters) {
+  stm->batch_query = pipe->create_batch_query(pipe, num_batch_counters,
+  batch);
+  stm->batch_result = CALLOC(num_batch_counters, 
sizeof(stm->batch_result->batch[0]));
+  if (!stm->batch_query || !stm->batch_result)
+ goto fail;
+   }
+
+   FREE(batch);
return true;
+
+fail:
+   FREE(batch);
+   return false;
 }
 
 static void
@@ -105,6 +139,13 @@ reset_perf_monitor(struct st_perf_monitor_object *stm,
FREE(stm->active_counters);
stm->active_counters = NULL;
stm->num_active_counters = 0;
+
+   if (stm->batch_query) {
+  pipe->destroy_query(pipe, stm->batch_query);
+  stm->batch_query = NULL;
+   }
+   FREE(stm->batch_result);
+   stm->batch_result = NULL;
 }
 
 static struct gl_perf_monitor_object *
@@ -143,9 +184,13 @@ st_BeginPerfMonitor(struct gl_context *ctx, struct 
gl_perf_monitor_object *m)
/* Start the query for each active counter. */
for (i = 0; i < stm->num_active_counters; ++i) {
   struct pipe_query *query = stm->active_counters[i].query;
-  if (!pipe->begin_query(pipe, query))
+  if (query && !pipe->begin_query(pipe, query))
   goto fail;
}
+
+   if (stm->batch_query && !pipe->begin_query(pipe, stm->batch_query))
+  goto fail;
+
return true;
 
 fail:
@@ -164,8 +209,12 @@ st_EndPerfMonitor(struct gl_context *ctx, struct 
gl_perf_monitor_object *m)
/* Stop the query for each active counter. */
for (i = 0; i < stm->num_active_counters; ++i) {
   struct pipe_query *query = stm->active_counters[i].query;
-  pipe->end_query(pipe, query);
+  if (query)
+ pipe->end_query(pipe, query);
}
+
+   if (stm->batch_query)
+  pipe->end_query(pipe, stm->batch_query);
 }
 
 static void
@@ -199,11 +248,16 @@ st_IsPerfMonitorResultAvailable(struct gl_context *ctx,
for (i = 0; i < stm->num_active_counters; ++i) {
   struct pipe_query *query = stm->active_counters[i].query;
   union pipe_query_result result;
-  if (!pipe->get_query_result(p

[Mesa-dev] [PATCH v2 8/9] gallium/hud: add support for batch queries

2015-11-13 Thread Nicolai Hähnle
v2: be more defensive about allocations
---
 src/gallium/auxiliary/hud/hud_context.c  |  24 ++-
 src/gallium/auxiliary/hud/hud_driver_query.c | 265 +++
 src/gallium/auxiliary/hud/hud_private.h  |  13 +-
 3 files changed, 256 insertions(+), 46 deletions(-)

diff --git a/src/gallium/auxiliary/hud/hud_context.c 
b/src/gallium/auxiliary/hud/hud_context.c
index ffe30b8..bcef701 100644
--- a/src/gallium/auxiliary/hud/hud_context.c
+++ b/src/gallium/auxiliary/hud/hud_context.c
@@ -57,6 +57,7 @@ struct hud_context {
struct cso_context *cso;
struct u_upload_mgr *uploader;
 
+   struct hud_batch_query_context *batch_query;
struct list_head pane_list;
 
/* states */
@@ -510,6 +511,8 @@ hud_draw(struct hud_context *hud, struct pipe_resource *tex)
hud_alloc_vertices(hud, &hud->text, 4 * 512, 4 * sizeof(float));
 
/* prepare all graphs */
+   hud_batch_query_update(hud->batch_query);
+
LIST_FOR_EACH_ENTRY(pane, &hud->pane_list, head) {
   LIST_FOR_EACH_ENTRY(gr, &pane->graph_list, head) {
  gr->query_new_value(gr);
@@ -903,17 +906,21 @@ hud_parse_env_var(struct hud_context *hud, const char 
*env)
   }
   else if (strcmp(name, "samples-passed") == 0 &&
has_occlusion_query(hud->pipe->screen)) {
- hud_pipe_query_install(pane, hud->pipe, "samples-passed",
+ hud_pipe_query_install(&hud->batch_query, pane, hud->pipe,
+"samples-passed",
 PIPE_QUERY_OCCLUSION_COUNTER, 0, 0,
 PIPE_DRIVER_QUERY_TYPE_UINT64,
-PIPE_DRIVER_QUERY_RESULT_TYPE_AVERAGE);
+PIPE_DRIVER_QUERY_RESULT_TYPE_AVERAGE,
+0);
   }
   else if (strcmp(name, "primitives-generated") == 0 &&
has_streamout(hud->pipe->screen)) {
- hud_pipe_query_install(pane, hud->pipe, "primitives-generated",
+ hud_pipe_query_install(&hud->batch_query, pane, hud->pipe,
+"primitives-generated",
 PIPE_QUERY_PRIMITIVES_GENERATED, 0, 0,
 PIPE_DRIVER_QUERY_TYPE_UINT64,
-PIPE_DRIVER_QUERY_RESULT_TYPE_AVERAGE);
+PIPE_DRIVER_QUERY_RESULT_TYPE_AVERAGE,
+0);
   }
   else {
  boolean processed = FALSE;
@@ -938,17 +945,19 @@ hud_parse_env_var(struct hud_context *hud, const char 
*env)
if (strcmp(name, pipeline_statistics_names[i]) == 0)
   break;
 if (i < Elements(pipeline_statistics_names)) {
-   hud_pipe_query_install(pane, hud->pipe, name,
+   hud_pipe_query_install(&hud->batch_query, pane, hud->pipe, name,
   PIPE_QUERY_PIPELINE_STATISTICS, i,
   0, PIPE_DRIVER_QUERY_TYPE_UINT64,
-  PIPE_DRIVER_QUERY_RESULT_TYPE_AVERAGE);
+  PIPE_DRIVER_QUERY_RESULT_TYPE_AVERAGE,
+  0);
processed = TRUE;
 }
  }
 
  /* driver queries */
  if (!processed) {
-if (!hud_driver_query_install(pane, hud->pipe, name)){
+if (!hud_driver_query_install(&hud->batch_query, pane, hud->pipe,
+  name)) {
fprintf(stderr, "gallium_hud: unknown driver query '%s'\n", 
name);
 }
  }
@@ -1287,6 +1296,7 @@ hud_destroy(struct hud_context *hud)
   FREE(pane);
}
 
+   hud_batch_query_cleanup(&hud->batch_query);
pipe->delete_fs_state(pipe, hud->fs_color);
pipe->delete_fs_state(pipe, hud->fs_text);
pipe->delete_vs_state(pipe, hud->vs);
diff --git a/src/gallium/auxiliary/hud/hud_driver_query.c 
b/src/gallium/auxiliary/hud/hud_driver_query.c
index 3198ab3..29f70fc 100644
--- a/src/gallium/auxiliary/hud/hud_driver_query.c
+++ b/src/gallium/auxiliary/hud/hud_driver_query.c
@@ -34,13 +34,159 @@
 #include "hud/hud_private.h"
 #include "pipe/p_screen.h"
 #include "os/os_time.h"
+#include "util/u_math.h"
 #include "util/u_memory.h"
 #include 
 
+// Must be a power of two
 #define NUM_QUERIES 8
 
+struct hud_batch_query_context {
+   struct pipe_context *pipe;
+   unsigned num_query_types;
+   unsigned allocated_query_types;
+   unsigned *query_types;
+
+   boolean failed;
+   struct pipe_query *query[NUM_QUERIES];
+   union pipe_query_result *result[NUM_QUERIES];
+   unsigned head, pending, results;
+};
+
+void
+hud_batch_query_update(struct hud_batch_query_context *bq)
+{
+   struct pipe_context *pipe;
+
+   if (!bq || bq->failed)
+  return;
+
+   pipe = bq->pipe;
+
+   if (bq->query[bq->head])
+  pipe->end_query(pipe, bq->query[bq->head]);
+
+   bq->results = 0;
+
+   while (bq->pendi

[Mesa-dev] [PATCH v2 2/9] gallium/hud: remove unused field in query_info

2015-11-13 Thread Nicolai Hähnle
Reviewed-by: Samuel Pitoiset 
---
 src/gallium/auxiliary/hud/hud_driver_query.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/gallium/auxiliary/hud/hud_driver_query.c 
b/src/gallium/auxiliary/hud/hud_driver_query.c
index f14305e..3198ab3 100644
--- a/src/gallium/auxiliary/hud/hud_driver_query.c
+++ b/src/gallium/auxiliary/hud/hud_driver_query.c
@@ -48,7 +48,6 @@ struct query_info {
/* Ring of queries. If a query is busy, we use another slot. */
struct pipe_query *query[NUM_QUERIES];
unsigned head, tail;
-   unsigned num_queries;
 
uint64_t last_time;
uint64_t results_cumulative;
-- 
2.5.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v2 6/9] st/mesa: maintain active perfmon counters in an array

2015-11-13 Thread Nicolai Hähnle
It is easy enough to pre-determine the required size, and arrays are
generally better behaved especially when they get large.

v2: make sure init_perf_monitor returns true when no counters are active
(spotted by Samuel Pitoiset)
---
 src/mesa/state_tracker/st_cb_perfmon.c | 81 --
 src/mesa/state_tracker/st_cb_perfmon.h | 18 
 2 files changed, 58 insertions(+), 41 deletions(-)

diff --git a/src/mesa/state_tracker/st_cb_perfmon.c 
b/src/mesa/state_tracker/st_cb_perfmon.c
index ec12eb2..8628e23 100644
--- a/src/mesa/state_tracker/st_cb_perfmon.c
+++ b/src/mesa/state_tracker/st_cb_perfmon.c
@@ -42,15 +42,14 @@ init_perf_monitor(struct gl_context *ctx, struct 
gl_perf_monitor_object *m)
struct st_context *st = st_context(ctx);
struct st_perf_monitor_object *stm = st_perf_monitor_object(m);
struct pipe_context *pipe = st->pipe;
+   unsigned num_active_counters = 0;
int gid, cid;
 
st_flush_bitmap_cache(st);
 
-   /* Create a query for each active counter. */
+   /* Determine the number of active counters. */
for (gid = 0; gid < ctx->PerfMonitor.NumGroups; gid++) {
   const struct gl_perf_monitor_group *g = &ctx->PerfMonitor.Groups[gid];
-  const struct st_perf_monitor_group *stg = &st->perfmon[gid];
-  BITSET_WORD tmp;
 
   if (m->ActiveGroups[gid] > g->MaxActiveCounters) {
  /* Maximum number of counters reached. Cannot start the session. */
@@ -61,19 +60,32 @@ init_perf_monitor(struct gl_context *ctx, struct 
gl_perf_monitor_object *m)
  return false;
   }
 
+  num_active_counters += m->ActiveGroups[gid];
+   }
+
+   if (!num_active_counters)
+  return true;
+
+   stm->active_counters = CALLOC(num_active_counters,
+ sizeof(*stm->active_counters));
+   if (!stm->active_counters)
+  return false;
+
+   /* Create a query for each active counter. */
+   for (gid = 0; gid < ctx->PerfMonitor.NumGroups; gid++) {
+  const struct gl_perf_monitor_group *g = &ctx->PerfMonitor.Groups[gid];
+  const struct st_perf_monitor_group *stg = &st->perfmon[gid];
+  BITSET_WORD tmp;
+
   BITSET_FOREACH_SET(cid, tmp, m->ActiveCounters[gid], g->NumCounters) {
  const struct st_perf_monitor_counter *stc = &stg->counters[cid];
- struct st_perf_counter_object *cntr;
-
- cntr = CALLOC_STRUCT(st_perf_counter_object);
- if (!cntr)
-return false;
+ struct st_perf_counter_object *cntr =
+&stm->active_counters[stm->num_active_counters];
 
  cntr->query= pipe->create_query(pipe, stc->query_type, 0);
  cntr->id   = cid;
  cntr->group_id = gid;
-
- list_addtail(&cntr->list, &stm->active_counters);
+ ++stm->num_active_counters;
   }
}
return true;
@@ -83,24 +95,24 @@ static void
 reset_perf_monitor(struct st_perf_monitor_object *stm,
struct pipe_context *pipe)
 {
-   struct st_perf_counter_object *cntr, *tmp;
+   unsigned i;
 
-   LIST_FOR_EACH_ENTRY_SAFE(cntr, tmp, &stm->active_counters, list) {
-  if (cntr->query)
- pipe->destroy_query(pipe, cntr->query);
-  list_del(&cntr->list);
-  free(cntr);
+   for (i = 0; i < stm->num_active_counters; ++i) {
+  struct pipe_query *query = stm->active_counters[i].query;
+  if (query)
+ pipe->destroy_query(pipe, query);
}
+   FREE(stm->active_counters);
+   stm->active_counters = NULL;
+   stm->num_active_counters = 0;
 }
 
 static struct gl_perf_monitor_object *
 st_NewPerfMonitor(struct gl_context *ctx)
 {
struct st_perf_monitor_object *stq = 
ST_CALLOC_STRUCT(st_perf_monitor_object);
-   if (stq) {
-  list_inithead(&stq->active_counters);
+   if (stq)
   return &stq->base;
-   }
return NULL;
 }
 
@@ -119,9 +131,9 @@ st_BeginPerfMonitor(struct gl_context *ctx, struct 
gl_perf_monitor_object *m)
 {
struct st_perf_monitor_object *stm = st_perf_monitor_object(m);
struct pipe_context *pipe = st_context(ctx)->pipe;
-   struct st_perf_counter_object *cntr;
+   unsigned i;
 
-   if (LIST_IS_EMPTY(&stm->active_counters)) {
+   if (!stm->num_active_counters) {
   /* Create a query for each active counter before starting
* a new monitoring session. */
   if (!init_perf_monitor(ctx, m))
@@ -129,8 +141,9 @@ st_BeginPerfMonitor(struct gl_context *ctx, struct 
gl_perf_monitor_object *m)
}
 
/* Start the query for each active counter. */
-   LIST_FOR_EACH_ENTRY(cntr, &stm->active_counters, list) {
-  if (!pipe->begin_query(pipe, cntr->query))
+   for (i = 0; i < stm->num_active_counters; ++i) {
+  struct pipe_query *query = stm->active_counters[i].query;
+  if (!pipe->begin_query(pipe, query))
   goto fail;
}
return true;
@@ -146,11 +159,13 @@ st_EndPerfMonitor(struct gl_context *ctx, struct 
gl_perf_monitor_object *m)
 {
struct st_perf_monitor_object *stm = st_perf_monitor_object(m);
struct pipe_conte

Re: [Mesa-dev] [PATCH v2 1/2] mesa: make glFramebuffer* check immutable texture level bounds

2017-01-30 Thread Nicolai Hähnle

On 26.01.2017 06:47, Ilia Mirkin wrote:

When a texture is immutable, we can't tack on extra levels
after-the-fact like we could with glTexImage. So check against that
level limit and return an error if it's surpassed.

The spec is a little unclear in that it says to check if "level is not a
supported texture level", however that is never fully defined.

This fixes:
GL45-CTS.geometry_shader.layered_fbo.fb_texture_invalid_level_number

Signed-off-by: Ilia Mirkin 
---

v1 -> v2: use NumLevels instead of _MaxLevel.

Not sure why this isn't showing up as failing in the Intel CI, but it was
definitely failing here.


Maybe the Intel CI is running the GLCTS based on the last 4.5 release, 
and I guess you're running off what's been published on Github? The 
GLCTS on Github has a bunch of new and possibly broken tests, and may 
still have a number of regressions as well (since a lot of code was 
moved around).


Can you point out which specific place of the spec you're talking about 
in your comment?


Thanks,
Nicolai



 src/mesa/main/fbobject.c | 22 ++
 1 file changed, 14 insertions(+), 8 deletions(-)

diff --git a/src/mesa/main/fbobject.c b/src/mesa/main/fbobject.c
index 6934805..6e86248 100644
--- a/src/mesa/main/fbobject.c
+++ b/src/mesa/main/fbobject.c
@@ -3128,16 +3128,22 @@ check_layer(struct gl_context *ctx, GLenum target, 
GLint layer,
  * \return true if no errors, false if errors
  */
 static bool
-check_level(struct gl_context *ctx, GLenum target, GLint level,
-const char *caller)
+check_level(struct gl_context *ctx, const struct gl_texture_object *texObj,
+GLint level, const char *caller)
 {
if ((level < 0) ||
-   (level >= _mesa_max_texture_levels(ctx, target))) {
+   (level >= _mesa_max_texture_levels(ctx, texObj->Target))) {
   _mesa_error(ctx, GL_INVALID_VALUE,
   "%s(invalid level %d)", caller, level);
   return false;
}

+   if (texObj->Immutable && level >= texObj->NumLevels) {
+  _mesa_error(ctx, GL_INVALID_VALUE,
+  "%s(level %u >= %u)", caller, level, texObj->NumLevels);
+  return false;
+   }
+
return true;
 }

@@ -3260,7 +3266,7 @@ framebuffer_texture_with_dims(int dims, GLenum target,
   if ((dims == 3) && !check_layer(ctx, texObj->Target, layer, caller))
  return;

-  if (!check_level(ctx, textarget, level, caller))
+  if (!check_level(ctx, texObj, level, caller))
  return;
}

@@ -3328,7 +3334,7 @@ _mesa_FramebufferTextureLayer(GLenum target, GLenum 
attachment,
   if (!check_layer(ctx, texObj->Target, layer, func))
  return;

-  if (!check_level(ctx, texObj->Target, level, func))
+  if (!check_level(ctx, texObj, level, func))
  return;

   if (texObj->Target == GL_TEXTURE_CUBE_MAP) {
@@ -3370,7 +3376,7 @@ _mesa_NamedFramebufferTextureLayer(GLuint framebuffer, 
GLenum attachment,
   if (!check_layer(ctx, texObj->Target, layer, func))
  return;

-  if (!check_level(ctx, texObj->Target, level, func))
+  if (!check_level(ctx, texObj, level, func))
  return;

   if (texObj->Target == GL_TEXTURE_CUBE_MAP) {
@@ -3419,7 +3425,7 @@ _mesa_FramebufferTexture(GLenum target, GLenum attachment,
   if (!check_layered_texture_target(ctx, texObj->Target, func, &layered))
  return;

-  if (!check_level(ctx, texObj->Target, level, func))
+  if (!check_level(ctx, texObj, level, func))
  return;
}

@@ -3459,7 +3465,7 @@ _mesa_NamedFramebufferTexture(GLuint framebuffer, GLenum 
attachment,
 &layered))
  return;

-  if (!check_level(ctx, texObj->Target, level, func))
+  if (!check_level(ctx, texObj, level, func))
  return;
}




___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v2 2/2] mesa: use same is_color_attachment trick to discern error cases

2017-01-30 Thread Nicolai Hähnle

On 26.01.2017 06:47, Ilia Mirkin wrote:

All the other calls to retrieve the attachment have been covered except
this one - return the proper error for attachment points that are valid
enums but out of bound for the driver.

Fixes GL45-CTS.geometry_shader.layered_fbo.fb_texture_invalid_attachment

Signed-off-by: Ilia Mirkin 


Reviewed-by: Nicolai Hähnle 


---

v1 -> v2: no changes. also no change in Intel CI.

 src/mesa/main/fbobject.c | 14 +++---
 1 file changed, 11 insertions(+), 3 deletions(-)

diff --git a/src/mesa/main/fbobject.c b/src/mesa/main/fbobject.c
index 6e86248..04d1daf 100644
--- a/src/mesa/main/fbobject.c
+++ b/src/mesa/main/fbobject.c
@@ -3156,6 +3156,7 @@ _mesa_framebuffer_texture(struct gl_context *ctx, struct 
gl_framebuffer *fb,
   const char *caller)
 {
struct gl_renderbuffer_attachment *att;
+   bool is_color_attachment;

/* The window-system framebuffer object is immutable */
if (_mesa_is_winsys_fbo(fb)) {
@@ -3165,10 +3166,17 @@ _mesa_framebuffer_texture(struct gl_context *ctx, 
struct gl_framebuffer *fb,
}

/* Not a hash lookup, so we can afford to get the attachment here. */
-   att = get_attachment(ctx, fb, attachment, NULL);
+   att = get_attachment(ctx, fb, attachment, &is_color_attachment);
if (att == NULL) {
-  _mesa_error(ctx, GL_INVALID_ENUM, "%s(invalid attachment %s)", caller,
-  _mesa_enum_to_string(attachment));
+  if (is_color_attachment) {
+ _mesa_error(ctx, GL_INVALID_OPERATION,
+ "%s(invalid color attachment %s)", caller,
+ _mesa_enum_to_string(attachment));
+  } else {
+ _mesa_error(ctx, GL_INVALID_ENUM,
+ "%s(invalid attachment %s)", caller,
+ _mesa_enum_to_string(attachment));
+  }
   return;
}




___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 16/17] radeonsi: atomize the scratch buffer state

2017-01-30 Thread Nicolai Hähnle

On 26.01.2017 17:04, Marek Olšák wrote:

From: Marek Olšák 

The update frequency is very low.

Difference: Only account for the size when allocating a new one.


Why not in the atom emit? With this patch, the size is only accounted 
for in the first CS.


Nicolai


---
 src/gallium/drivers/radeonsi/si_cp_dma.c|  3 ++-
 src/gallium/drivers/radeonsi/si_hw_context.c|  2 +-
 src/gallium/drivers/radeonsi/si_pipe.h  |  2 +-
 src/gallium/drivers/radeonsi/si_state.h |  1 +
 src/gallium/drivers/radeonsi/si_state_draw.c| 24 
 src/gallium/drivers/radeonsi/si_state_shaders.c | 24 ++--
 6 files changed, 27 insertions(+), 29 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_cp_dma.c 
b/src/gallium/drivers/radeonsi/si_cp_dma.c
index b398256..e198765 100644
--- a/src/gallium/drivers/radeonsi/si_cp_dma.c
+++ b/src/gallium/drivers/radeonsi/si_cp_dma.c
@@ -276,21 +276,22 @@ static void si_cp_dma_realign_engine(struct si_context 
*sctx, unsigned size,
 * idle at this point.
 */
if (!sctx->scratch_buffer ||
sctx->scratch_buffer->b.b.width0 < scratch_size) {
r600_resource_reference(&sctx->scratch_buffer, NULL);
sctx->scratch_buffer = (struct r600_resource*)
pipe_buffer_create(&sctx->screen->b.b, 0,
   PIPE_USAGE_DEFAULT, scratch_size);
if (!sctx->scratch_buffer)
return;
-   sctx->emit_scratch_reloc = true;
+
+   si_mark_atom_dirty(sctx, &sctx->scratch_state);
}

si_cp_dma_prepare(sctx, &sctx->scratch_buffer->b.b,
  &sctx->scratch_buffer->b.b, size, size, user_flags,
  is_first, &dma_flags);

va = sctx->scratch_buffer->gpu_address;
si_emit_cp_dma(sctx, va, va + CP_DMA_ALIGNMENT, size, dma_flags,
   R600_COHERENCY_SHADER);
 }
diff --git a/src/gallium/drivers/radeonsi/si_hw_context.c 
b/src/gallium/drivers/radeonsi/si_hw_context.c
index e5da730..dabbc8e 100644
--- a/src/gallium/drivers/radeonsi/si_hw_context.c
+++ b/src/gallium/drivers/radeonsi/si_hw_context.c
@@ -218,20 +218,21 @@ void si_begin_new_cs(struct si_context *ctx)
si_mark_atom_dirty(ctx, &ctx->clip_state.atom);
ctx->msaa_sample_locs.nr_samples = 0;
si_mark_atom_dirty(ctx, &ctx->msaa_sample_locs.atom);
si_mark_atom_dirty(ctx, &ctx->msaa_config);
si_mark_atom_dirty(ctx, &ctx->sample_mask.atom);
si_mark_atom_dirty(ctx, &ctx->cb_render_state);
si_mark_atom_dirty(ctx, &ctx->blend_color.atom);
si_mark_atom_dirty(ctx, &ctx->db_render_state);
si_mark_atom_dirty(ctx, &ctx->stencil_ref.atom);
si_mark_atom_dirty(ctx, &ctx->spi_map);
+   si_mark_atom_dirty(ctx, &ctx->scratch_state);
si_mark_atom_dirty(ctx, &ctx->b.streamout.enable_atom);
si_mark_atom_dirty(ctx, &ctx->b.render_cond_atom);
si_all_descriptors_begin_new_cs(ctx);

ctx->b.scissors.dirty_mask = (1 << R600_MAX_VIEWPORTS) - 1;
ctx->b.viewports.dirty_mask = (1 << R600_MAX_VIEWPORTS) - 1;
ctx->b.viewports.depth_range_dirty_mask = (1 << R600_MAX_VIEWPORTS) - 1;
si_mark_atom_dirty(ctx, &ctx->b.scissors.atom);
si_mark_atom_dirty(ctx, &ctx->b.viewports.atom);

@@ -244,18 +245,17 @@ void si_begin_new_cs(struct si_context *ctx)
 * the first draw call. */
si_invalidate_draw_sh_constants(ctx);
ctx->last_index_size = -1;
ctx->last_primitive_restart_en = -1;
ctx->last_restart_index = SI_RESTART_INDEX_UNKNOWN;
ctx->last_gs_out_prim = -1;
ctx->last_prim = -1;
ctx->last_multi_vgt_param = -1;
ctx->last_rast_prim = -1;
ctx->last_sc_line_stipple = ~0;
-   ctx->emit_scratch_reloc = true;
ctx->last_ls = NULL;
ctx->last_tcs = NULL;
ctx->last_tes_sh_base = -1;
ctx->last_num_tcs_input_cp = -1;

ctx->cs_shader_state.initialized = false;
 }
diff --git a/src/gallium/drivers/radeonsi/si_pipe.h 
b/src/gallium/drivers/radeonsi/si_pipe.h
index 724d89e..d17d55a 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -345,22 +345,22 @@ struct si_context {
int last_restart_index;
int last_gs_out_prim;
int last_prim;
int last_multi_vgt_param;
int last_rast_prim;
unsignedlast_sc_line_stipple;
int current_rast_prim; /* primitive type after TES, 
GS */
boolgs_tri_strip_adj_fix;

/* Scratch buffer */
+   struct r600_atomscratch_state;
struct r600_resource*scratch_buffer;
-   boolemit_scratch_reloc;
 

Re: [Mesa-dev] [PATCH 17/17] radeonsi: fold info->indirect conditionals into the last one in draw_vbo

2017-01-30 Thread Nicolai Hähnle

I have one comment/question on patch #16. Apart from that, the series is

Reviewed-by: Nicolai Hähnle 

On 26.01.2017 17:04, Marek Olšák wrote:

From: Marek Olšák 

---
 src/gallium/drivers/radeonsi/si_state_draw.c | 25 +
 1 file changed, 13 insertions(+), 12 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c 
b/src/gallium/drivers/radeonsi/si_state_draw.c
index e4a6b7f..1dd6dcc 100644
--- a/src/gallium/drivers/radeonsi/si_state_draw.c
+++ b/src/gallium/drivers/radeonsi/si_state_draw.c
@@ -1089,34 +1089,35 @@ void si_draw_vbo(struct pipe_context *ctx, const struct 
pipe_draw_info *info)
}
}

/* VI reads index buffers through TC L2. */
if (info->indexed && sctx->b.chip_class <= CIK &&
r600_resource(ib.buffer)->TC_L2_dirty) {
sctx->b.flags |= SI_CONTEXT_WRITEBACK_GLOBAL_L2;
r600_resource(ib.buffer)->TC_L2_dirty = false;
}

-   if (info->indirect && r600_resource(info->indirect)->TC_L2_dirty) {
-   sctx->b.flags |= SI_CONTEXT_WRITEBACK_GLOBAL_L2;
-   r600_resource(info->indirect)->TC_L2_dirty = false;
-   }
+   if (info->indirect) {
+   /* Add the buffer size for memory checking in need_cs_space. */
+   r600_context_add_resource_size(ctx, info->indirect);

-   if (info->indirect_params &&
-   r600_resource(info->indirect_params)->TC_L2_dirty) {
-   sctx->b.flags |= SI_CONTEXT_WRITEBACK_GLOBAL_L2;
-   r600_resource(info->indirect_params)->TC_L2_dirty = false;
-   }
+   if (r600_resource(info->indirect)->TC_L2_dirty) {
+   sctx->b.flags |= SI_CONTEXT_WRITEBACK_GLOBAL_L2;
+   r600_resource(info->indirect)->TC_L2_dirty = false;
+   }

-   /* Add buffer sizes for memory checking in need_cs_space. */
-   if (info->indirect)
-   r600_context_add_resource_size(ctx, info->indirect);
+   if (info->indirect_params &&
+   r600_resource(info->indirect_params)->TC_L2_dirty) {
+   sctx->b.flags |= SI_CONTEXT_WRITEBACK_GLOBAL_L2;
+   r600_resource(info->indirect_params)->TC_L2_dirty = 
false;
+   }
+   }

si_need_cs_space(sctx);

/* Since we've called r600_context_add_resource_size for vertex buffers,
 * this must be called after si_need_cs_space, because we must let
 * need_cs_space flush before we add buffers to the buffer list.
 */
if (!si_upload_vertex_buffer_descriptors(sctx))
return;




___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] st/mesa: make st_texture_get_sampler_view() static

2017-01-30 Thread Nicolai Hähnle

On 27.01.2017 14:34, Samuel Pitoiset wrote:

Signed-off-by: Samuel Pitoiset 


Reviewed-by: Nicolai Hähnle 


---
 src/mesa/state_tracker/st_sampler_view.c | 2 +-
 src/mesa/state_tracker/st_sampler_view.h | 4 
 2 files changed, 1 insertion(+), 5 deletions(-)

diff --git a/src/mesa/state_tracker/st_sampler_view.c 
b/src/mesa/state_tracker/st_sampler_view.c
index 88d5d1aae2..c78a987486 100644
--- a/src/mesa/state_tracker/st_sampler_view.c
+++ b/src/mesa/state_tracker/st_sampler_view.c
@@ -46,7 +46,7 @@
  * If none is found an empty slot is initialized with a
  * template and returned instead.
  */
-struct pipe_sampler_view **
+static struct pipe_sampler_view **
 st_texture_get_sampler_view(struct st_context *st,
 struct st_texture_object *stObj)
 {
diff --git a/src/mesa/state_tracker/st_sampler_view.h 
b/src/mesa/state_tracker/st_sampler_view.h
index d4c38bccdc..6825cebdf6 100644
--- a/src/mesa/state_tracker/st_sampler_view.h
+++ b/src/mesa/state_tracker/st_sampler_view.h
@@ -57,10 +57,6 @@ st_create_texture_sampler_view(struct pipe_context *pipe,
 }


-extern struct pipe_sampler_view **
-st_texture_get_sampler_view(struct st_context *st,
-struct st_texture_object *stObj);
-
 extern void
 st_texture_release_sampler_view(struct st_context *st,
 struct st_texture_object *stObj);



___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] hud: fix compilation warnings in hud_nic_graph_install()

2017-01-30 Thread Nicolai Hähnle

On 27.01.2017 14:35, Samuel Pitoiset wrote:

Signed-off-by: Samuel Pitoiset 
---
 src/gallium/auxiliary/hud/hud_nic.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/gallium/auxiliary/hud/hud_nic.c 
b/src/gallium/auxiliary/hud/hud_nic.c
index f9935dea8b..7e4feb9573 100644
--- a/src/gallium/auxiliary/hud/hud_nic.c
+++ b/src/gallium/auxiliary/hud/hud_nic.c
@@ -263,11 +263,11 @@ hud_nic_graph_install(struct hud_pane *pane, const char 
*nic_name,

nic->mode = mode;
if (nic->mode == NIC_DIRECTION_RX) {
-  snprintf(gr->name, sizeof(gr->name), "%s-rx-%lldMbps", nic->name,
+  snprintf(gr->name, sizeof(gr->name), "%s-rx-%"PRIx64"Mbps", nic->name,


I think you want PRId64 - PRIx64 gives you hexadecimal.

Cheers,
Nicolai


  nic->speedMbps);
}
else if (nic->mode == NIC_DIRECTION_TX) {
-  snprintf(gr->name, sizeof(gr->name), "%s-tx-%lldMbps", nic->name,
+  snprintf(gr->name, sizeof(gr->name), "%s-tx-%"PRIx64"Mbps", nic->name,
  nic->speedMbps);
}
else if (nic->mode == NIC_RSSI_DBM)



___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v2] various: Fix missing DumpModule with recent LLVM.

2017-01-30 Thread Nicolai Hähnle

On 28.01.2017 23:08, Bas Nieuwenhuizen wrote:

Since LLVM revision 293359 DumpModule gets only implemented when
either a debug build or LLVM_ENABLE_DUMP is set.

This patch adds a direct replacement for the function for radv and
radeonsi, However, as I don't know a good place to put common LLVM
code for all three I inlined the implementation for LLVMPipe.

v2: Use the new code for LLVM 3.4+ instead of LLVM 5+ & fixed indentation

Signed-off-by: Bas Nieuwenhuizen 
---
 src/amd/common/ac_llvm_util.c| 8 
 src/amd/common/ac_llvm_util.h| 3 +++
 src/amd/common/ac_nir_to_llvm.c  | 2 +-
 src/gallium/drivers/llvmpipe/lp_jit.c| 8 +++-
 src/gallium/drivers/radeonsi/si_shader.c | 6 +++---
 5 files changed, 22 insertions(+), 5 deletions(-)

diff --git a/src/amd/common/ac_llvm_util.c b/src/amd/common/ac_llvm_util.c
index 770e3bd13c1..7317db76baa 100644
--- a/src/amd/common/ac_llvm_util.c
+++ b/src/amd/common/ac_llvm_util.c
@@ -504,3 +504,11 @@ ac_prepare_cube_coords(struct ac_llvm_context *ctx,

memcpy(coords_arg, coords, sizeof(coords));
 }
+
+void
+ac_dump_module(LLVMModuleRef module)
+{
+   char *str = LLVMPrintModuleToString(module);
+   fprintf(stderr, "%s", str);
+   LLVMDisposeMessage(str);
+}
diff --git a/src/amd/common/ac_llvm_util.h b/src/amd/common/ac_llvm_util.h
index 802c2662470..2d301c93575 100644
--- a/src/amd/common/ac_llvm_util.h
+++ b/src/amd/common/ac_llvm_util.h
@@ -95,6 +95,9 @@ ac_prepare_cube_coords(struct ac_llvm_context *ctx,
   LLVMValueRef *coords_arg,
   LLVMValueRef *derivs_arg);

+void
+ac_dump_module(LLVMModuleRef module);
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index 26b87e8782b..5dc74e82da0 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -4566,7 +4566,7 @@ static void ac_compile_llvm_module(LLVMTargetMachineRef 
tm,
   bool dump_shader)
 {
if (dump_shader)
-   LLVMDumpModule(llvm_module);
+   ac_dump_module(llvm_module);

memset(binary, 0, sizeof(*binary));
int v = ac_llvm_compile(llvm_module, binary, tm);
diff --git a/src/gallium/drivers/llvmpipe/lp_jit.c 
b/src/gallium/drivers/llvmpipe/lp_jit.c
index 21260369aca..bb2b87f862f 100644
--- a/src/gallium/drivers/llvmpipe/lp_jit.c
+++ b/src/gallium/drivers/llvmpipe/lp_jit.c
@@ -222,7 +222,13 @@ lp_jit_create_types(struct lp_fragment_shader_variant *lp)
}

if (gallivm_debug & GALLIVM_DEBUG_IR) {
-  LLVMDumpModule(gallivm->module);
+#if HAVE_LLVM >= 0x304
+  char *str = LLVMPrintModuleToString(gallivm->module);
+  fprintf(stderr, "%s", str);
+  LLVMDisposeMessage(str);
+#else
+  DumpModule(gallivm->module);


Should be: LLVMDumpModule

Apart from that:

Reviewed-by: Nicolai Hähnle 


+#endif
}
 }

diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
b/src/gallium/drivers/radeonsi/si_shader.c
index 5ca974e48b4..cad7bf7a024 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -6400,7 +6400,7 @@ int si_compile_llvm(struct si_screen *sscreen,

if (!(sscreen->b.debug_flags & (DBG_NO_IR | DBG_PREOPT_IR))) {
fprintf(stderr, "%s LLVM IR:\n\n", name);
-   LLVMDumpModule(mod);
+   ac_dump_module(mod);
fprintf(stderr, "\n");
}
}
@@ -6599,7 +6599,7 @@ si_generate_gs_copy_shader(struct si_screen *sscreen,
/* Dump LLVM IR before any optimization passes */
if (sscreen->b.debug_flags & DBG_PREOPT_IR &&
r600_can_dump_shader(&sscreen->b, PIPE_SHADER_GEOMETRY))
-   LLVMDumpModule(bld_base->base.gallivm->module);
+   ac_dump_module(bld_base->base.gallivm->module);

si_llvm_finalize_module(&ctx,
r600_extra_shader_checks(&sscreen->b, PIPE_SHADER_GEOMETRY));
@@ -7603,7 +7603,7 @@ int si_compile_tgsi_shader(struct si_screen *sscreen,
/* Dump LLVM IR before any optimization passes */
if (sscreen->b.debug_flags & DBG_PREOPT_IR &&
r600_can_dump_shader(&sscreen->b, ctx.type))
-   LLVMDumpModule(mod);
+   ac_dump_module(mod);

si_llvm_finalize_module(&ctx,
r600_extra_shader_checks(&sscreen->b, 
ctx.type));



___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] r600/sb: Fix memory leak

2017-01-30 Thread Nicolai Hähnle

Nice find!

On 29.01.2017 19:10, Bartosz Tomczyk wrote:

---
 src/gallium/drivers/r600/sb/sb_valtable.cpp | 8 +++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/src/gallium/drivers/r600/sb/sb_valtable.cpp 
b/src/gallium/drivers/r600/sb/sb_valtable.cpp
index a8b7b49cd4..d31a1b76d5 100644
--- a/src/gallium/drivers/r600/sb/sb_valtable.cpp
+++ b/src/gallium/drivers/r600/sb/sb_valtable.cpp
@@ -241,6 +241,7 @@ void value::remove_use(const node *n) {
{
// TODO assert((*it)->kind == kind) ?
// TODO assert((*it)->arg == arg) ?
+   delete *it;
uses.erase(it);


The delete should really be after the erase.

Cheers,
Nicolai


}
 }
@@ -290,7 +291,12 @@ bool value::is_prealloc() {
 }

 void value::delete_uses() {
-   uses.erase(uses.begin(), uses.end());
+   for (uselist::iterator it = uses.begin(); it != uses.end(); ++it)
+   {
+   delete *it;
+   }
+
+   uses.clear();
 }

 void ra_constraint::update_values() {



___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 3/3] winsys/radeon: clamp vram_vis_size to 256MB

2017-01-30 Thread Nicolai Hähnle

For the series:

Reviewed-by: Nicolai Hähnle 

On 30.01.2017 01:33, Marek Olšák wrote:

From: Marek Olšák 

the value from the kernel is wrong
---
 src/gallium/winsys/radeon/drm/radeon_drm_winsys.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c 
b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c
index 278d4f3..a8da62f 100644
--- a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c
+++ b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c
@@ -365,21 +365,21 @@ static bool do_winsys_init(struct radeon_drm_winsys *ws)
 /* Get GEM info. */
 retval = drmCommandWriteRead(ws->fd, DRM_RADEON_GEM_INFO,
 &gem_info, sizeof(gem_info));
 if (retval) {
 fprintf(stderr, "radeon: Failed to get MM info, error number %d\n",
 retval);
 return false;
 }
 ws->info.gart_size = gem_info.gart_size;
 ws->info.vram_size = gem_info.vram_size;
-ws->info.vram_vis_size = gem_info.vram_visible;
+ws->info.vram_vis_size = MIN2(gem_info.vram_visible, 256*1024*1024);

 /* Radeon allocates all buffers as contigous, which makes large allocations
  * unlikely to succeed. */
 ws->info.max_alloc_size = MAX2(ws->info.vram_size, ws->info.gart_size) * 
0.7;
 if (ws->info.drm_minor < 40)
 ws->info.max_alloc_size = MIN2(ws->info.max_alloc_size, 256*1024*1024);

 /* Get max clock frequency info and convert it to MHz */
 radeon_get_drm_value(ws->fd, RADEON_INFO_MAX_SCLK, NULL,
  &ws->info.max_shader_clock);



___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] r600: fix a compilation warning in r600_screen_create()

2017-01-30 Thread Nicolai Hähnle

On 30.01.2017 13:55, Samuel Pitoiset wrote:

Should be r600_common_screen instead of r600_screen.

Fixes: 80157a2c20 ("gallium/radeon: clean up r600_query_init_backend_mask")
Signed-off-by: Samuel Pitoiset 


Reviewed-by: Nicolai Hähnle 


---
 src/gallium/drivers/r600/r600_pipe.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/gallium/drivers/r600/r600_pipe.c 
b/src/gallium/drivers/r600/r600_pipe.c
index 1d9111d004..e83ba32ef5 100644
--- a/src/gallium/drivers/r600/r600_pipe.c
+++ b/src/gallium/drivers/r600/r600_pipe.c
@@ -735,6 +735,6 @@ struct pipe_screen *r600_screen_create(struct radeon_winsys 
*ws)
if (rscreen->b.debug_flags & DBG_TEST_DMA)
r600_test_dma(&rscreen->b);

-   r600_query_fix_enabled_rb_mask(rscreen);
+   r600_query_fix_enabled_rb_mask(&rscreen->b);
return &rscreen->b.b;
 }



___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 16/17] radeonsi: atomize the scratch buffer state

2017-01-30 Thread Nicolai Hähnle

Reviewed-by: Nicolai Hähnle 

On 30.01.2017 12:54, Marek Olšák wrote:

From: Marek Olšák 

The update frequency is very low.

Difference: Only account for the size when allocating a new one and when
starting a new IB, and check for NULL. (v3)
---
 src/gallium/drivers/radeonsi/si_cp_dma.c|  3 ++-
 src/gallium/drivers/radeonsi/si_hw_context.c|  7 ++-
 src/gallium/drivers/radeonsi/si_pipe.h  |  2 +-
 src/gallium/drivers/radeonsi/si_state.h |  1 +
 src/gallium/drivers/radeonsi/si_state_draw.c| 24 
 src/gallium/drivers/radeonsi/si_state_shaders.c | 24 ++--
 6 files changed, 32 insertions(+), 29 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_cp_dma.c 
b/src/gallium/drivers/radeonsi/si_cp_dma.c
index b398256..e198765 100644
--- a/src/gallium/drivers/radeonsi/si_cp_dma.c
+++ b/src/gallium/drivers/radeonsi/si_cp_dma.c
@@ -276,21 +276,22 @@ static void si_cp_dma_realign_engine(struct si_context 
*sctx, unsigned size,
 * idle at this point.
 */
if (!sctx->scratch_buffer ||
sctx->scratch_buffer->b.b.width0 < scratch_size) {
r600_resource_reference(&sctx->scratch_buffer, NULL);
sctx->scratch_buffer = (struct r600_resource*)
pipe_buffer_create(&sctx->screen->b.b, 0,
   PIPE_USAGE_DEFAULT, scratch_size);
if (!sctx->scratch_buffer)
return;
-   sctx->emit_scratch_reloc = true;
+
+   si_mark_atom_dirty(sctx, &sctx->scratch_state);
}

si_cp_dma_prepare(sctx, &sctx->scratch_buffer->b.b,
  &sctx->scratch_buffer->b.b, size, size, user_flags,
  is_first, &dma_flags);

va = sctx->scratch_buffer->gpu_address;
si_emit_cp_dma(sctx, va, va + CP_DMA_ALIGNMENT, size, dma_flags,
   R600_COHERENCY_SHADER);
 }
diff --git a/src/gallium/drivers/radeonsi/si_hw_context.c 
b/src/gallium/drivers/radeonsi/si_hw_context.c
index e5da730..c80b884 100644
--- a/src/gallium/drivers/radeonsi/si_hw_context.c
+++ b/src/gallium/drivers/radeonsi/si_hw_context.c
@@ -228,34 +228,39 @@ void si_begin_new_cs(struct si_context *ctx)
si_mark_atom_dirty(ctx, &ctx->b.streamout.enable_atom);
si_mark_atom_dirty(ctx, &ctx->b.render_cond_atom);
si_all_descriptors_begin_new_cs(ctx);

ctx->b.scissors.dirty_mask = (1 << R600_MAX_VIEWPORTS) - 1;
ctx->b.viewports.dirty_mask = (1 << R600_MAX_VIEWPORTS) - 1;
ctx->b.viewports.depth_range_dirty_mask = (1 << R600_MAX_VIEWPORTS) - 1;
si_mark_atom_dirty(ctx, &ctx->b.scissors.atom);
si_mark_atom_dirty(ctx, &ctx->b.viewports.atom);

+   si_mark_atom_dirty(ctx, &ctx->scratch_state);
+   if (ctx->scratch_buffer) {
+   r600_context_add_resource_size(&ctx->b.b,
+  &ctx->scratch_buffer->b.b);
+   }
+
r600_postflush_resume_features(&ctx->b);

assert(!ctx->b.gfx.cs->prev_dw);
ctx->b.initial_gfx_cs_size = ctx->b.gfx.cs->current.cdw;

/* Invalidate various draw states so that they are emitted before
 * the first draw call. */
si_invalidate_draw_sh_constants(ctx);
ctx->last_index_size = -1;
ctx->last_primitive_restart_en = -1;
ctx->last_restart_index = SI_RESTART_INDEX_UNKNOWN;
ctx->last_gs_out_prim = -1;
ctx->last_prim = -1;
ctx->last_multi_vgt_param = -1;
ctx->last_rast_prim = -1;
ctx->last_sc_line_stipple = ~0;
-   ctx->emit_scratch_reloc = true;
ctx->last_ls = NULL;
ctx->last_tcs = NULL;
ctx->last_tes_sh_base = -1;
ctx->last_num_tcs_input_cp = -1;

ctx->cs_shader_state.initialized = false;
 }
diff --git a/src/gallium/drivers/radeonsi/si_pipe.h 
b/src/gallium/drivers/radeonsi/si_pipe.h
index 724d89e..d17d55a 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -345,22 +345,22 @@ struct si_context {
int last_restart_index;
int last_gs_out_prim;
int last_prim;
int last_multi_vgt_param;
int last_rast_prim;
unsignedlast_sc_line_stipple;
int current_rast_prim; /* primitive type after TES, 
GS */
boolgs_tri_strip_adj_fix;

/* Scratch buffer */
+   struct r600_atomscratch_state;
struct r600_resource*scratch_buffer;
-   bool

Re: [Mesa-dev] [PATCH] r600: Fix stack overflow

2017-01-30 Thread Nicolai Hähnle

On 30.01.2017 14:07, Bartosz Tomczyk wrote:

Commit 7b5878ee0491e7a93914389a8369cd6752b9757d increased number of
outputs to 64, but left output array intact. This caused stack overflow
when number of outputs is bigger then 32. Found by ASAN.


Pushed, thanks!


---
 src/gallium/drivers/r600/r600_shader.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/gallium/drivers/r600/r600_shader.c 
b/src/gallium/drivers/r600/r600_shader.c
index b692e7f4a1..b80a3f8b62 100644
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -2924,7 +2924,7 @@ static int r600_shader_from_tgsi(struct r600_context 
*rctx,
struct pipe_stream_output_info so = pipeshader->selector->so;
struct tgsi_full_immediate *immediate;
struct r600_shader_ctx ctx;
-   struct r600_bytecode_output output[32];
+   struct r600_bytecode_output output[ARRAY_SIZE(shader->output)];
unsigned output_done, noutput;
unsigned opcode;
int i, j, k, r = 0;



___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 03/10] gallium: set pipe_context::stream_uploader

2017-01-30 Thread Nicolai Hähnle

On 27.01.2017 12:02, Marek Olšák wrote:

From: Marek Olšák 

Notes:
- make sure the default size is large enough to handle all state trackers
- pipe wrappers don't receive transfer calls from stream_uploader, because
  pipe_context::stream_uploader points directly to the underlying driver's
  stream_uploader (to keep it simple for now)
---
 src/gallium/drivers/ddebug/dd_context.c   |  1 +
 src/gallium/drivers/etnaviv/etnaviv_context.c |  7 +++
 src/gallium/drivers/freedreno/freedreno_context.c |  8 
 src/gallium/drivers/i915/i915_context.c   |  5 +
 src/gallium/drivers/ilo/ilo_context.c |  1 +
 src/gallium/drivers/llvmpipe/lp_context.c |  8 
 src/gallium/drivers/noop/noop_pipe.c  |  5 +
 src/gallium/drivers/nouveau/nv30/nv30_context.c   | 10 ++
 src/gallium/drivers/nouveau/nv50/nv50_context.c   |  6 ++
 src/gallium/drivers/nouveau/nvc0/nvc0_context.c   |  5 +
 src/gallium/drivers/r300/r300_context.c   |  3 ++-
 src/gallium/drivers/radeon/r600_pipe_common.c |  1 +
 src/gallium/drivers/rbug/rbug_context.c   |  1 +
 src/gallium/drivers/softpipe/sp_context.c |  7 +++
 src/gallium/drivers/svga/svga_context.c   |  6 ++
 src/gallium/drivers/swr/swr_context.cpp   |  8 
 src/gallium/drivers/trace/tr_context.c|  1 +
 src/gallium/drivers/vc4/vc4_context.c | 10 +-
 src/gallium/drivers/virgl/virgl_context.c |  1 +
 19 files changed, 88 insertions(+), 6 deletions(-)


[snip]

diff --git a/src/gallium/drivers/noop/noop_pipe.c 
b/src/gallium/drivers/noop/noop_pipe.c
index 3013019..6ef4f6f 100644
--- a/src/gallium/drivers/noop/noop_pipe.c
+++ b/src/gallium/drivers/noop/noop_pipe.c
@@ -22,20 +22,21 @@
  */
 #include 
 #include 
 #include "pipe/p_defines.h"
 #include "pipe/p_state.h"
 #include "pipe/p_context.h"
 #include "pipe/p_screen.h"
 #include "util/u_memory.h"
 #include "util/u_inlines.h"
 #include "util/u_format.h"
+#include "util/u_upload_mgr.h"
 #include "noop_public.h"

 DEBUG_GET_ONCE_BOOL_OPTION(noop, "GALLIUM_NOOP", FALSE)

 void noop_init_state_functions(struct pipe_context *ctx);

 struct noop_pipe_screen {
struct pipe_screen  pscreen;
struct pipe_screen  *oscreen;
 };
@@ -282,20 +283,23 @@ noop_flush_resource(struct pipe_context *ctx,
 static void noop_flush(struct pipe_context *ctx,
struct pipe_fence_handle **fence,
unsigned flags)
 {
if (fence)
   *fence = NULL;
 }

 static void noop_destroy_context(struct pipe_context *ctx)
 {
+   if (ctx->stream_uploader)
+  u_upload_destroy(ctx->stream_uploader);
+
FREE(ctx);
 }

 static boolean noop_generate_mipmap(struct pipe_context *ctx,
 struct pipe_resource *resource,
 enum pipe_format format,
 unsigned base_level,
 unsigned last_level,
 unsigned first_layer,
 unsigned last_layer)
@@ -305,20 +309,21 @@ static boolean noop_generate_mipmap(struct pipe_context 
*ctx,

 static struct pipe_context *noop_create_context(struct pipe_screen *screen,
 void *priv, unsigned flags)
 {
struct pipe_context *ctx = CALLOC_STRUCT(pipe_context);

if (!ctx)
   return NULL;
ctx->screen = screen;
ctx->priv = priv;
+   ctx->stream_uploader = u_upload_create_default(ctx);


Error handling would be nice. (The allocation of the u_upload_mgr struct 
itself can still fail.)




ctx->destroy = noop_destroy_context;
ctx->flush = noop_flush;
ctx->clear = noop_clear;
ctx->clear_render_target = noop_clear_render_target;
ctx->clear_depth_stencil = noop_clear_depth_stencil;
ctx->resource_copy_region = noop_resource_copy_region;
ctx->generate_mipmap = noop_generate_mipmap;
ctx->blit = noop_blit;
ctx->flush_resource = noop_flush_resource;
ctx->create_query = noop_create_query;

[snip]

diff --git a/src/gallium/drivers/nouveau/nv50/nv50_context.c 
b/src/gallium/drivers/nouveau/nv50/nv50_context.c
index ece7da9..9d34c4d 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_context.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_context.c
@@ -15,20 +15,21 @@
  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  * OTHER DEALINGS IN THE SOFTWARE.
  */

 #include "pipe/p_defines.h"
 #include "util/u_framebuffer.h"
+#include "util/u_upload_mgr.h"

 #include "nv50/nv50_context.h"
 #include "nv50/nv50_screen.h"
 #in

Re: [Mesa-dev] [PATCH 01/10] gallium: add a common uploader to pipe_context

2017-01-30 Thread Nicolai Hähnle

On 27.01.2017 16:02, Marek Olšák wrote:

On Fri, Jan 27, 2017 at 3:38 PM, Roland Scheidegger  wrote:

Am 27.01.2017 um 12:02 schrieb Marek Olšák:

From: Marek Olšák 

For lower memory usage and more efficient updates of the buffer residency
list. (e.g. if drivers keep seeing the same buffer for many consecutive
"add" calls, the calls can be turned into no-ops trivially)
---
 src/gallium/include/pipe/p_context.h | 7 +++
 1 file changed, 7 insertions(+)

diff --git a/src/gallium/include/pipe/p_context.h 
b/src/gallium/include/pipe/p_context.h
index 45098c9..5876968 100644
--- a/src/gallium/include/pipe/p_context.h
+++ b/src/gallium/include/pipe/p_context.h
@@ -69,33 +69,40 @@ struct pipe_stream_output_target;
 struct pipe_surface;
 struct pipe_transfer;
 struct pipe_vertex_buffer;
 struct pipe_vertex_element;
 struct pipe_video_buffer;
 struct pipe_video_codec;
 struct pipe_viewport_state;
 struct pipe_compute_state;
 union pipe_color_union;
 union pipe_query_result;
+struct u_upload_mgr;

 /**
  * Gallium rendering context.  Basically:
  *  - state setting functions
  *  - VBO drawing functions
  *  - surface functions
  */
 struct pipe_context {
struct pipe_screen *screen;

void *priv;  /**< context private data (for DRI for example) */
void *draw;  /**< private, for draw module (temporary?) */

+   /**
+* Stream uploader created by the driver. All drivers, state trackers, and
+* modules should use it.
+*/
+   struct u_upload_mgr *stream_uploader;
+
void (*destroy)( struct pipe_context * );

/**
 * VBO drawing
 */
/*@{*/
void (*draw_vbo)( struct pipe_context *pipe,
  const struct pipe_draw_info *info );
/*@}*/




I suppose this makes sense. However, this makes util interfaces
effectively part of the gallium interface, not sure how I feel about
that as this seems to violate the contract that util code is optional.


To be honest, I don't care much. Putting the uploader into
pipe_context has an obvious practical advantage and we want exactly
one instance between drivers and state trackers.

Also, all utilities that had to use pipe_buffer_create just to upload
something for one use can now use this.

If somebody wants a proper formal interface, feel free.

I do see a small decrease in CPU overhead with legacy GL apps thanks
to amdgpu_cs_add_buffer getting the same buffer over and over again.


I agree that this is a nice cleanup and improvement at the same time.

Roland has a point, of course, and in theory one could add an 
upload_alloc function to the pipe_context, but in practice, that just 
adds an indirect call for no benefit. I think we should go ahead with this.


I have some comments on patch #3, but apart from that, the series is

Reviewed-by: Nicolai Hähnle 



Marek
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev



___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v2 1/2] mesa: make glFramebuffer* check immutable texture level bounds

2017-01-30 Thread Nicolai Hähnle

On 30.01.2017 18:23, Ilia Mirkin wrote:

On Mon, Jan 30, 2017 at 4:36 AM, Ilia Mirkin  wrote:

On Mon, Jan 30, 2017 at 4:33 AM, Nicolai Hähnle  wrote:

On 26.01.2017 06:47, Ilia Mirkin wrote:


When a texture is immutable, we can't tack on extra levels
after-the-fact like we could with glTexImage. So check against that
level limit and return an error if it's surpassed.

The spec is a little unclear in that it says to check if "level is not a
supported texture level", however that is never fully defined.

This fixes:
GL45-CTS.geometry_shader.layered_fbo.fb_texture_invalid_level_number

Signed-off-by: Ilia Mirkin 
---

v1 -> v2: use NumLevels instead of _MaxLevel.

Not sure why this isn't showing up as failing in the Intel CI, but it was
definitely failing here.



Maybe the Intel CI is running the GLCTS based on the last 4.5 release, and I
guess you're running off what's been published on Github? The GLCTS on
Github has a bunch of new and possibly broken tests, and may still have a
number of regressions as well (since a lot of code was moved around).

Can you point out which specific place of the spec you're talking about in
your comment?


One of the errors listed for glFramebufferTexture is:

"""
An INVALID_VALUE error is generated if texture is not zero and is not the
name of a texture object, or if level is not a supported texture level
for texture
"""


Curiously for glFramebufferTexture1D/2D/3D, it also says:

"""
If textarget is TEXTURE_RECTANGLE or TEXTURE_2D_MULTISAMPLE, then
level must be zero. If textarget is TEXTURE_3D, then level must be
greater than or equal to zero and less than or equal to log2 of the
value of MAX_3D_TEXTURE_- SIZE. If textarget is one of the cube map
face targets from table 8.19, then level must be greater than or equal
to zero and less than or equal to log2 of the value of
MAX_CUBE_MAP_TEXTURE_SIZE. For all other values of textarget, level
must be greater than or equal to zero and no larger than log2 of the
value of MAX_- TEXTURE_SIZE.
"""

which matches the current code. I guess this patch is withdrawn...


What a coincidence, I've been staring at this for a while now and been 
getting increasingly confused. Which function does the CTS actually test 
(and possibly fail incorrectly)? Maybe it's time to open an issue 
against the CTS.


Cheers,
Nicolai



  -ilia



___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 03/10] gallium: set pipe_context::stream_uploader

2017-01-30 Thread Nicolai Hähnle

On 30.01.2017 18:20, Marek Olšák wrote:

On Mon, Jan 30, 2017 at 6:00 PM, Nicolai Hähnle  wrote:

On 27.01.2017 12:02, Marek Olšák wrote:


From: Marek Olšák 

Notes:
- make sure the default size is large enough to handle all state trackers
- pipe wrappers don't receive transfer calls from stream_uploader, because
  pipe_context::stream_uploader points directly to the underlying driver's
  stream_uploader (to keep it simple for now)
---
 src/gallium/drivers/ddebug/dd_context.c   |  1 +
 src/gallium/drivers/etnaviv/etnaviv_context.c |  7 +++
 src/gallium/drivers/freedreno/freedreno_context.c |  8 
 src/gallium/drivers/i915/i915_context.c   |  5 +
 src/gallium/drivers/ilo/ilo_context.c |  1 +
 src/gallium/drivers/llvmpipe/lp_context.c |  8 
 src/gallium/drivers/noop/noop_pipe.c  |  5 +
 src/gallium/drivers/nouveau/nv30/nv30_context.c   | 10 ++
 src/gallium/drivers/nouveau/nv50/nv50_context.c   |  6 ++
 src/gallium/drivers/nouveau/nvc0/nvc0_context.c   |  5 +
 src/gallium/drivers/r300/r300_context.c   |  3 ++-
 src/gallium/drivers/radeon/r600_pipe_common.c |  1 +
 src/gallium/drivers/rbug/rbug_context.c   |  1 +
 src/gallium/drivers/softpipe/sp_context.c |  7 +++
 src/gallium/drivers/svga/svga_context.c   |  6 ++
 src/gallium/drivers/swr/swr_context.cpp   |  8 
 src/gallium/drivers/trace/tr_context.c|  1 +
 src/gallium/drivers/vc4/vc4_context.c | 10 +-
 src/gallium/drivers/virgl/virgl_context.c |  1 +
 19 files changed, 88 insertions(+), 6 deletions(-)


[snip]


diff --git a/src/gallium/drivers/noop/noop_pipe.c
b/src/gallium/drivers/noop/noop_pipe.c
index 3013019..6ef4f6f 100644
--- a/src/gallium/drivers/noop/noop_pipe.c
+++ b/src/gallium/drivers/noop/noop_pipe.c
@@ -22,20 +22,21 @@
  */
 #include 
 #include 
 #include "pipe/p_defines.h"
 #include "pipe/p_state.h"
 #include "pipe/p_context.h"
 #include "pipe/p_screen.h"
 #include "util/u_memory.h"
 #include "util/u_inlines.h"
 #include "util/u_format.h"
+#include "util/u_upload_mgr.h"
 #include "noop_public.h"

 DEBUG_GET_ONCE_BOOL_OPTION(noop, "GALLIUM_NOOP", FALSE)

 void noop_init_state_functions(struct pipe_context *ctx);

 struct noop_pipe_screen {
struct pipe_screen  pscreen;
struct pipe_screen  *oscreen;
 };
@@ -282,20 +283,23 @@ noop_flush_resource(struct pipe_context *ctx,
 static void noop_flush(struct pipe_context *ctx,
struct pipe_fence_handle **fence,
unsigned flags)
 {
if (fence)
   *fence = NULL;
 }

 static void noop_destroy_context(struct pipe_context *ctx)
 {
+   if (ctx->stream_uploader)
+  u_upload_destroy(ctx->stream_uploader);
+
FREE(ctx);
 }

 static boolean noop_generate_mipmap(struct pipe_context *ctx,
 struct pipe_resource *resource,
 enum pipe_format format,
 unsigned base_level,
 unsigned last_level,
 unsigned first_layer,
 unsigned last_layer)
@@ -305,20 +309,21 @@ static boolean noop_generate_mipmap(struct
pipe_context *ctx,

 static struct pipe_context *noop_create_context(struct pipe_screen
*screen,
 void *priv, unsigned
flags)
 {
struct pipe_context *ctx = CALLOC_STRUCT(pipe_context);

if (!ctx)
   return NULL;
ctx->screen = screen;
ctx->priv = priv;
+   ctx->stream_uploader = u_upload_create_default(ctx);



Error handling would be nice. (The allocation of the u_upload_mgr struct
itself can still fail.)



ctx->destroy = noop_destroy_context;
ctx->flush = noop_flush;
ctx->clear = noop_clear;
ctx->clear_render_target = noop_clear_render_target;
ctx->clear_depth_stencil = noop_clear_depth_stencil;
ctx->resource_copy_region = noop_resource_copy_region;
ctx->generate_mipmap = noop_generate_mipmap;
ctx->blit = noop_blit;
ctx->flush_resource = noop_flush_resource;
ctx->create_query = noop_create_query;


[snip]


diff --git a/src/gallium/drivers/nouveau/nv50/nv50_context.c
b/src/gallium/drivers/nouveau/nv50/nv50_context.c
index ece7da9..9d34c4d 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_context.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_context.c
@@ -15,20 +15,21 @@
  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY,
  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT
SHALL
  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  * ARISING FROM, OUT

Re: [Mesa-dev] [PATCH] winsys/amdgpu: add a fast exit path into amdgpu_cs_add_buffer

2017-01-30 Thread Nicolai Hähnle

On 30.01.2017 18:17, Jan Ziak wrote:

On Mon, Jan 30, 2017 at 5:08 PM, Marek Olšák  wrote:

On Mon, Jan 30, 2017 at 4:41 PM, Jan Ziak <0xe2.0x9a.0...@gmail.com> wrote:

Hello

Just a note about the future of Mesa:

It is likely that optimizing Mesa will lead you (Marek) to consider
generative programming some years into the future:

Optimal code is basically a form of efficient adaptation to current
circumstances. Generating the optimal code by hand can be
inefficient/tedious and it might be possible to generate some of it by
a program. The generator reads in C/C++/other code&data and outputs
optimized C/C++ code containing the necessary guards, data parsers,
specialized data structures, etc. The machine-generated code can be an
order of magnitude larger than the input to the generator - which is
one of the main reasons generative programming increases programmer
productivity in the long term.

The generator itself can be written in any programming language you
prefer (although personally I only recommend a compiled language with
static types).


If it were so simple, somebody would have done it already.

Marek


It is simpler, more productive and less error-prone for Mesa-like
libraries than trying to optimize critical C code&data structures by
hand.

It is impossible to improve Mesa performance to a next-generation
level without refusing some hand-written code. The human brain is
incapable of handling so many states.

If you don't try it you will never know how far it can go in terms of
optimizing Mesa.


Talk is easy, action is hard - in this case particularly, given how 
unstructured a lot of the stuff is.


Note that automatic generation is already used for the command stream 
debug dumping, and if you think you found a place where it really helps, 
feel free to show us (with patches!) how it's done :P


In the meantime, you may find better targets for such efforts in LLVM.

Cheers,
Nicolai
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v2 1/2] mesa: make glFramebuffer* check immutable texture level bounds

2017-01-31 Thread Nicolai Hähnle

On 30.01.2017 19:09, Ilia Mirkin wrote:

On Mon, Jan 30, 2017 at 1:06 PM, Ilia Mirkin  wrote:

On Mon, Jan 30, 2017 at 12:26 PM, Nicolai Hähnle  wrote:

On 30.01.2017 18:23, Ilia Mirkin wrote:


On Mon, Jan 30, 2017 at 4:36 AM, Ilia Mirkin  wrote:


On Mon, Jan 30, 2017 at 4:33 AM, Nicolai Hähnle 
wrote:


On 26.01.2017 06:47, Ilia Mirkin wrote:



When a texture is immutable, we can't tack on extra levels
after-the-fact like we could with glTexImage. So check against that
level limit and return an error if it's surpassed.

The spec is a little unclear in that it says to check if "level is not
a
supported texture level", however that is never fully defined.

This fixes:
GL45-CTS.geometry_shader.layered_fbo.fb_texture_invalid_level_number

Signed-off-by: Ilia Mirkin 
---

v1 -> v2: use NumLevels instead of _MaxLevel.

Not sure why this isn't showing up as failing in the Intel CI, but it
was
definitely failing here.




Maybe the Intel CI is running the GLCTS based on the last 4.5 release,
and I
guess you're running off what's been published on Github? The GLCTS on
Github has a bunch of new and possibly broken tests, and may still have
a
number of regressions as well (since a lot of code was moved around).

Can you point out which specific place of the spec you're talking about
in
your comment?



One of the errors listed for glFramebufferTexture is:

"""
An INVALID_VALUE error is generated if texture is not zero and is not the
name of a texture object, or if level is not a supported texture level
for texture
"""



Curiously for glFramebufferTexture1D/2D/3D, it also says:

"""
If textarget is TEXTURE_RECTANGLE or TEXTURE_2D_MULTISAMPLE, then
level must be zero. If textarget is TEXTURE_3D, then level must be
greater than or equal to zero and less than or equal to log2 of the
value of MAX_3D_TEXTURE_- SIZE. If textarget is one of the cube map
face targets from table 8.19, then level must be greater than or equal
to zero and less than or equal to log2 of the value of
MAX_CUBE_MAP_TEXTURE_SIZE. For all other values of textarget, level
must be greater than or equal to zero and no larger than log2 of the
value of MAX_- TEXTURE_SIZE.
"""

which matches the current code. I guess this patch is withdrawn...



What a coincidence, I've been staring at this for a while now and been
getting increasingly confused. Which function does the CTS actually test
(and possibly fail incorrectly)? Maybe it's time to open an issue against
the CTS.


https://github.com/KhronosGroup/VK-GL-CTS/blob/c9921995d8d360bd34d8672194d7c095bb376f82/external/openglcts/modules/glesext/geometry_shader/esextcGeometryShaderLayeredFBO.cpp#L1062


A thought occurred to me... whereas glFramebufferTexture1D/etc talk
about binding points, glFramebufferTexture talks about a specific
texture object. When are the bindings resolved? at
glFramebufferTexture2D time, or at draw time? If the latter, then the
spec has no choice but to just check the maxima for the binding point
ones...


The problem is that in all cases, the error table lists INVALID_ENUM 
when "level is not a supported texture level for $foo", but it's not 
explicitly stated how "supported texture level" is defined for 
FramebufferTexture.


However, I did a bit more searching, and there's language below 
FramebufferTextureLayer which matches that of FramebufferTexture1D/2D/3D:


"If texture is a three-dimensional texture, then level must be greater 
than or equal to zero and less than or equal to log 2 of the value of 
MAX_3D_TEXTURE_SIZE. If texture is a two-dimensional array texture, then 
level must be greater than or equal to zero and no larger than log 2 of 
the value of MAX_TEXTURE_SIZE."


It would be odd to have different requirements for FramebufferTexture 
and FramebufferTextureLayer.


Furthermore, like you said, I think the bindings are supposed to be 
resolved at draw time. So if you re-define a texture with 
TexStorage1D/2D/3D, the binding survives. This matches what Mesa does 
(see the various update_fbo_texture functions).


The language in the ES spec seems to match that of GL.

I think that the test is just wrong. Can you file an issue asking about 
that?


Thanks,
Nicolai
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 03/10] gallium: set pipe_context::stream_uploader

2017-01-31 Thread Nicolai Hähnle

On 31.01.2017 02:37, Marek Olšák wrote:

On Mon, Jan 30, 2017 at 6:29 PM, Nicolai Hähnle  wrote:

On 30.01.2017 18:20, Marek Olšák wrote:


On Mon, Jan 30, 2017 at 6:00 PM, Nicolai Hähnle 
wrote:


On 27.01.2017 12:02, Marek Olšák wrote:



From: Marek Olšák 

Notes:
- make sure the default size is large enough to handle all state
trackers
- pipe wrappers don't receive transfer calls from stream_uploader,
because
  pipe_context::stream_uploader points directly to the underlying
driver's
  stream_uploader (to keep it simple for now)
---
 src/gallium/drivers/ddebug/dd_context.c   |  1 +
 src/gallium/drivers/etnaviv/etnaviv_context.c |  7 +++
 src/gallium/drivers/freedreno/freedreno_context.c |  8 
 src/gallium/drivers/i915/i915_context.c   |  5 +
 src/gallium/drivers/ilo/ilo_context.c |  1 +
 src/gallium/drivers/llvmpipe/lp_context.c |  8 
 src/gallium/drivers/noop/noop_pipe.c  |  5 +
 src/gallium/drivers/nouveau/nv30/nv30_context.c   | 10 ++
 src/gallium/drivers/nouveau/nv50/nv50_context.c   |  6 ++
 src/gallium/drivers/nouveau/nvc0/nvc0_context.c   |  5 +
 src/gallium/drivers/r300/r300_context.c   |  3 ++-
 src/gallium/drivers/radeon/r600_pipe_common.c |  1 +
 src/gallium/drivers/rbug/rbug_context.c   |  1 +
 src/gallium/drivers/softpipe/sp_context.c |  7 +++
 src/gallium/drivers/svga/svga_context.c   |  6 ++
 src/gallium/drivers/swr/swr_context.cpp   |  8 
 src/gallium/drivers/trace/tr_context.c|  1 +
 src/gallium/drivers/vc4/vc4_context.c | 10 +-
 src/gallium/drivers/virgl/virgl_context.c |  1 +
 19 files changed, 88 insertions(+), 6 deletions(-)


[snip]


diff --git a/src/gallium/drivers/noop/noop_pipe.c
b/src/gallium/drivers/noop/noop_pipe.c
index 3013019..6ef4f6f 100644
--- a/src/gallium/drivers/noop/noop_pipe.c
+++ b/src/gallium/drivers/noop/noop_pipe.c
@@ -22,20 +22,21 @@
  */
 #include 
 #include 
 #include "pipe/p_defines.h"
 #include "pipe/p_state.h"
 #include "pipe/p_context.h"
 #include "pipe/p_screen.h"
 #include "util/u_memory.h"
 #include "util/u_inlines.h"
 #include "util/u_format.h"
+#include "util/u_upload_mgr.h"
 #include "noop_public.h"

 DEBUG_GET_ONCE_BOOL_OPTION(noop, "GALLIUM_NOOP", FALSE)

 void noop_init_state_functions(struct pipe_context *ctx);

 struct noop_pipe_screen {
struct pipe_screen  pscreen;
struct pipe_screen  *oscreen;
 };
@@ -282,20 +283,23 @@ noop_flush_resource(struct pipe_context *ctx,
 static void noop_flush(struct pipe_context *ctx,
struct pipe_fence_handle **fence,
unsigned flags)
 {
if (fence)
   *fence = NULL;
 }

 static void noop_destroy_context(struct pipe_context *ctx)
 {
+   if (ctx->stream_uploader)
+  u_upload_destroy(ctx->stream_uploader);
+
FREE(ctx);
 }

 static boolean noop_generate_mipmap(struct pipe_context *ctx,
 struct pipe_resource *resource,
 enum pipe_format format,
 unsigned base_level,
 unsigned last_level,
 unsigned first_layer,
 unsigned last_layer)
@@ -305,20 +309,21 @@ static boolean noop_generate_mipmap(struct
pipe_context *ctx,

 static struct pipe_context *noop_create_context(struct pipe_screen
*screen,
 void *priv, unsigned
flags)
 {
struct pipe_context *ctx = CALLOC_STRUCT(pipe_context);

if (!ctx)
   return NULL;
ctx->screen = screen;
ctx->priv = priv;
+   ctx->stream_uploader = u_upload_create_default(ctx);




Error handling would be nice. (The allocation of the u_upload_mgr struct
itself can still fail.)



ctx->destroy = noop_destroy_context;
ctx->flush = noop_flush;
ctx->clear = noop_clear;
ctx->clear_render_target = noop_clear_render_target;
ctx->clear_depth_stencil = noop_clear_depth_stencil;
ctx->resource_copy_region = noop_resource_copy_region;
ctx->generate_mipmap = noop_generate_mipmap;
ctx->blit = noop_blit;
ctx->flush_resource = noop_flush_resource;
ctx->create_query = noop_create_query;



[snip]


diff --git a/src/gallium/drivers/nouveau/nv50/nv50_context.c
b/src/gallium/drivers/nouveau/nv50/nv50_context.c
index ece7da9..9d34c4d 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_context.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_context.c
@@ -15,20 +15,21 @@
  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY,
  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT
SHALL
  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAM

Re: [Mesa-dev] [PATCH] winsys/radeon: Allow visible VRAM size > 256MB with kernel driver >= 2.49

2017-01-31 Thread Nicolai Hähnle

Both patches:

Reviewed-by: Nicolai Hähnle 

On 31.01.2017 07:54, Michel Dänzer wrote:

From: Michel Dänzer 

The kernel driver reports correct values now.

Signed-off-by: Michel Dänzer 
---
 src/gallium/winsys/radeon/drm/radeon_drm_winsys.c | 7 ++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c 
b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c
index a8da62fd36..cacd683879 100644
--- a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c
+++ b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c
@@ -372,7 +372,12 @@ static bool do_winsys_init(struct radeon_drm_winsys *ws)
 }
 ws->info.gart_size = gem_info.gart_size;
 ws->info.vram_size = gem_info.vram_size;
-ws->info.vram_vis_size = MIN2(gem_info.vram_visible, 256*1024*1024);
+ws->info.vram_vis_size = gem_info.vram_visible;
+/* Older versions of the kernel driver reported incorrect values, and
+ * didn't support more than 256MB of visible VRAM anyway
+ */
+if (ws->info.drm_minor < 49)
+ws->info.vram_vis_size = MIN2(ws->info.vram_vis_size, 256*1024*1024);

 /* Radeon allocates all buffers as contigous, which makes large allocations
  * unlikely to succeed. */



___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v2] glsl: fix heap-buffer-overflow

2017-01-31 Thread Nicolai Hähnle

On 31.01.2017 12:02, Bartosz Tomczyk wrote:

The `end+1` skips the ']', whereas the `strlen+1` includes the final
'\0' in the move to terminate the string.


Thanks! R-b and pushed.

Nicolai


---
 src/compiler/glsl/link_uniforms.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/compiler/glsl/link_uniforms.cpp 
b/src/compiler/glsl/link_uniforms.cpp
index a450aa03a8..4f047884e9 100644
--- a/src/compiler/glsl/link_uniforms.cpp
+++ b/src/compiler/glsl/link_uniforms.cpp
@@ -535,7 +535,7 @@ private:
 const char *str_end;
 while((str_start = strchr(name_copy, '[')) &&
   (str_end = strchr(name_copy, ']'))) {
-   memmove(str_start, str_end + 1, 1 + strlen(str_end));
+   memmove(str_start, str_end + 1, 1 + strlen(str_end + 1));
 }

 unsigned index = 0;



___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/2] radeonsi: Use llvm.amdgcn.s.buffer.load instead of llvm.SI.load.const

2017-02-01 Thread Nicolai Hähnle

On 31.01.2017 22:36, Tom Stellard wrote:

Advantages of using llvm.amdgcn.s.buffer.load

- We can use a real pointer type, which LLVM can better reason about and do
  alias analysis on.  This will also ease the transition to using fat pointers
  and LLVM IR loads.

- llvm.amdgcn.s.buffer.load is defined in IntrinsicsAMDGPU.td so passes can
  query information about it other than just its attributes.
---
 src/gallium/auxiliary/gallivm/lp_bld_intr.c|  1 +
 src/gallium/auxiliary/gallivm/lp_bld_intr.h|  3 +-
 src/gallium/drivers/radeonsi/si_shader.c   | 48 +-
 src/gallium/drivers/radeonsi/si_shader_internal.h  |  8 
 .../drivers/radeonsi/si_shader_tgsi_setup.c|  6 +++
 5 files changed, 55 insertions(+), 11 deletions(-)

diff --git a/src/gallium/auxiliary/gallivm/lp_bld_intr.c 
b/src/gallium/auxiliary/gallivm/lp_bld_intr.c
index 049671a..dc8de55 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_intr.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_intr.c
@@ -144,6 +144,7 @@ static const char *attr_to_str(enum lp_func_attr attr)
 {
switch (attr) {
case LP_FUNC_ATTR_ALWAYSINLINE: return "alwaysinline";
+   case LP_FUNC_ATTR_ARGMEMONLY: return "argmemonly";
case LP_FUNC_ATTR_BYVAL: return "byval";
case LP_FUNC_ATTR_INREG: return "inreg";
case LP_FUNC_ATTR_NOALIAS: return "noalias";
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_intr.h 
b/src/gallium/auxiliary/gallivm/lp_bld_intr.h
index f1e075a..7c8f09b 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_intr.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_intr.h
@@ -54,7 +54,8 @@ enum lp_func_attr {
LP_FUNC_ATTR_NOUNWIND = (1 << 4),
LP_FUNC_ATTR_READNONE = (1 << 5),
LP_FUNC_ATTR_READONLY = (1 << 6),
-   LP_FUNC_ATTR_LAST = (1 << 7)
+   LP_FUNC_ATTR_ARGMEMONLY   = (1 << 7),
+   LP_FUNC_ATTR_LAST = (1 << 8)
 };

 void
diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
b/src/gallium/drivers/radeonsi/si_shader.c
index a6de7c4..cf13cb5 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -93,11 +93,6 @@ static void si_build_ps_epilog_function(struct 
si_shader_context *ctx,
  */
 #define VS_EPILOG_PRIMID_LOC 2

-enum {
-   CONST_ADDR_SPACE = 2,
-   LOCAL_ADDR_SPACE = 3,
-};
-
 #define SENDMSG_GS 2
 #define SENDMSG_GS_DONE 3

@@ -360,8 +355,21 @@ static LLVMValueRef build_indexed_load_const(
struct si_shader_context *ctx,
LLVMValueRef base_ptr, LLVMValueRef index)
 {
+   LLVMTypeRef ptr_type = LLVMTypeOf(base_ptr);
+   LLVMTypeRef elem_type = LLVMGetElementType(ptr_type);
+   LLVMTypeKind elem_kind = LLVMGetTypeKind(elem_type);
LLVMValueRef result = build_indexed_load(ctx, base_ptr, index, true);
LLVMSetMetadata(result, ctx->invariant_load_md_kind, ctx->empty_md);
+
+   /* Set !dereferenceable metadata */
+   if (elem_kind == LLVMPointerTypeKind ||
+   (elem_kind == LLVMArrayTypeKind && 
LLVMGetTypeKind(LLVMGetElementType(elem_type)) == LLVMPointerTypeKind)) {
+   LLVMValueRef deref_bytes, deref_md;
+   deref_bytes = LLVMConstInt(ctx->i64, UINT64_MAX, 0);
+   deref_md = LLVMMDNodeInContext(LLVMGetTypeContext(ptr_type),
+   &deref_bytes, 1);
+   LLVMSetMetadata(result, ctx->dereferenceable_md_kind, deref_md);
+   }
return result;
 }

@@ -1571,16 +1579,34 @@ static LLVMValueRef get_thread_id(struct 
si_shader_context *ctx)

 /**
  * Load a dword from a constant buffer.
+ * @param offset This is a byte offset.
+ * @returns An LLVMValueRef with f32 type.
  */
 static LLVMValueRef buffer_load_const(struct si_shader_context *ctx,
  LLVMValueRef resource,
  LLVMValueRef offset)
 {
LLVMBuilderRef builder = ctx->gallivm.builder;
-   LLVMValueRef args[2] = {resource, offset};
+   LLVMValueRef load;
+   LLVMValueRef args[3] = {resource, offset, LLVMConstInt(ctx->i1, 0, 0) };
+   LLVMTypeRef resource_type = LLVMTypeOf(resource);
+   LLVMTypeKind resource_kind = LLVMGetTypeKind(resource_type);
+
+   /* XXX: We can have a non-pointer resource if we do a constant load
+ * from the RW_BUFFERS whicha are still represented using the <16 x i8>


s/whicha/which/

Also, this affects constant buffers in addition to RW buffers (at least 
for older versions of LLVM).


Apart from that and having to wait until the corresponding LLVM changes 
land, the two patches are


Reviewed-by: Nicolai Hähnle 


+ * type. We can eliminate this once we start using pointer types for
+* those buffers.
+*/
+   if (resource_kind != LLVMPointerTypeKind) {
+ 

Re: [Mesa-dev] [PATCH 3/3] st/mesa: inline get_mesa_program()

2017-02-01 Thread Nicolai Hähnle

For the series:

Reviewed-by: Nicolai Hähnle 

On 01.02.2017 01:58, Timothy Arceri wrote:

From: Timothy Arceri 

In the past I've gotten this function confused with the one in
ir_to_mesa.cpp of the same name. Now that the affected flag setting
has move into a helper it makes sense just to inline this remaining
code.
---
 src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 60 --
 1 file changed, 23 insertions(+), 37 deletions(-)

diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp 
b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index 823a2b4..d9a4038 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -6951,37 +6951,6 @@ set_prog_affected_state_flags(struct gl_program *prog)
}
 }

-static struct gl_program *
-get_mesa_program(struct gl_context *ctx,
- struct gl_shader_program *shader_program,
- struct gl_linked_shader *shader)
-{
-   struct pipe_screen *pscreen = ctx->st->pipe->screen;
-   enum pipe_shader_type ptarget = st_shader_stage_to_ptarget(shader->Stage);
-   enum pipe_shader_ir preferred_ir = (enum pipe_shader_ir)
-  pscreen->get_shader_param(pscreen, ptarget, 
PIPE_SHADER_CAP_PREFERRED_IR);
-   struct gl_program *prog = NULL;
-
-   if (preferred_ir == PIPE_SHADER_IR_NIR) {
-  /* TODO only for GLSL VS/FS for now: */
-  switch (shader->Stage) {
-  case MESA_SHADER_VERTEX:
-  case MESA_SHADER_FRAGMENT:
- prog = st_nir_get_mesa_program(ctx, shader_program, shader);
-  default:
- break;
-  }
-   } else {
-  prog = get_mesa_program_tgsi(ctx, shader_program, shader);
-   }
-
-   if (prog) {
-  set_prog_affected_state_flags(prog);
-   }
-
-   return prog;
-}
-
 /* See if there are unsupported control flow statements. */
 class ir_control_flow_info_visitor : public ir_hierarchical_visitor {
 private:
@@ -7163,19 +7132,36 @@ st_link_shader(struct gl_context *ctx, struct 
gl_shader_program *prog)
build_program_resource_list(ctx, prog);

for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
-  struct gl_program *linked_prog;
-
-  if (prog->_LinkedShaders[i] == NULL)
+  struct gl_linked_shader *shader = prog->_LinkedShaders[i];
+  if (shader == NULL)
  continue;

-  linked_prog = get_mesa_program(ctx, prog, prog->_LinkedShaders[i]);
+  enum pipe_shader_type ptarget =
+ st_shader_stage_to_ptarget(shader->Stage);
+  enum pipe_shader_ir preferred_ir = (enum pipe_shader_ir)
+ pscreen->get_shader_param(pscreen, ptarget,
+   PIPE_SHADER_CAP_PREFERRED_IR);
+
+  struct gl_program *linked_prog = NULL;
+  if (preferred_ir == PIPE_SHADER_IR_NIR) {
+ /* TODO only for GLSL VS/FS for now: */
+ switch (shader->Stage) {
+ case MESA_SHADER_VERTEX:
+ case MESA_SHADER_FRAGMENT:
+linked_prog = st_nir_get_mesa_program(ctx, prog, shader);
+ default:
+break;
+ }
+  } else {
+ linked_prog = get_mesa_program_tgsi(ctx, prog, shader);
+  }

   if (linked_prog) {
+ set_prog_affected_state_flags(shader->Program);
  if (!ctx->Driver.ProgramStringNotify(ctx,
   _mesa_shader_stage_to_program(i),
   linked_prog)) {
-_mesa_reference_program(ctx, &prog->_LinkedShaders[i]->Program,
-NULL);
+_mesa_reference_program(ctx, &shader->Program, NULL);
 return GL_FALSE;
  }
   }


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] radeonsi: Fix build on LLVM < 3.9 v2

2017-02-01 Thread Nicolai Hähnle

Reviewed-by: Nicolai Hähnle 

On 01.02.2017 01:22, Tom Stellard wrote:

This was broken by: e0cc0a614c96011958bc3a1b84da9168e0e1ccbb

v2:
  - Use preprocessor macro
---
 src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c | 6 --
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c 
b/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c
index 205686a..c7445e0 100644
--- a/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c
+++ b/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c
@@ -1256,8 +1256,6 @@ void si_llvm_context_init(struct si_shader_context *ctx,
  const struct tgsi_token *tokens)
 {
struct lp_type type;
-   LLVMTargetDataRef data_layout = LLVMCreateTargetDataLayout(tm);
-   char *data_layout_str = LLVMCopyStringRepOfTargetData(data_layout);

/* Initialize the gallivm object:
 * We are only using the module, context, and builder fields of this 
struct.
@@ -1275,9 +1273,13 @@ void si_llvm_context_init(struct si_shader_context *ctx,
ctx->gallivm.context);
LLVMSetTarget(ctx->gallivm.module, "amdgcn--");

+#if HAVE_LLVM >= 0x0309
+   LLVMTargetDataRef data_layout = LLVMCreateTargetDataLayout(tm);
+   char *data_layout_str = LLVMCopyStringRepOfTargetData(data_layout);
LLVMSetDataLayout(ctx->gallivm.module, data_layout_str);
LLVMDisposeTargetData(data_layout);
LLVMDisposeMessage(data_layout_str);
+#endif

bool unsafe_fpmath = (sscreen->b.debug_flags & DBG_UNSAFE_MATH) != 0;
ctx->gallivm.builder = lp_create_builder(ctx->gallivm.context,


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] radeonsi/ac: move frag interp emission code to shared llvm code.

2017-02-01 Thread Nicolai Hähnle

On 01.02.2017 05:48, Dave Airlie wrote:

From: Dave Airlie 

This code should be used in radv, so move it to a shared location
in advance of doing that.

Signed-off-by: Dave Airlie 


Reviewed-by: Nicolai Hähnle 


---
 src/amd/common/ac_llvm_util.c|  71 ++
 src/amd/common/ac_llvm_util.h|  14 +
 src/gallium/drivers/radeonsi/si_shader.c | 100 ---
 3 files changed, 98 insertions(+), 87 deletions(-)

diff --git a/src/amd/common/ac_llvm_util.c b/src/amd/common/ac_llvm_util.c
index f3cab92..8c87a13 100644
--- a/src/amd/common/ac_llvm_util.c
+++ b/src/amd/common/ac_llvm_util.c
@@ -512,3 +512,74 @@ ac_dump_module(LLVMModuleRef module)
fprintf(stderr, "%s", str);
LLVMDisposeMessage(str);
 }
+
+LLVMValueRef
+ac_build_fs_interp(struct ac_llvm_context *ctx,
+  LLVMValueRef llvm_chan,
+  LLVMValueRef attr_number,
+  LLVMValueRef params,
+  LLVMValueRef i,
+  LLVMValueRef j)
+{
+   LLVMValueRef args[5];
+   LLVMValueRef p1;
+   
+   if (HAVE_LLVM < 0x0400) {
+   LLVMValueRef ij[2];
+   ij[0] = LLVMBuildBitCast(ctx->builder, i, ctx->i32, "");
+   ij[1] = LLVMBuildBitCast(ctx->builder, j, ctx->i32, "");
+
+   args[0] = llvm_chan;
+   args[1] = attr_number;
+   args[2] = params;
+   args[3] = ac_build_gather_values(ctx, ij, 2);
+   return ac_emit_llvm_intrinsic(ctx, "llvm.SI.fs.interp",
+ ctx->f32, args, 4,
+ AC_FUNC_ATTR_READNONE);
+   }
+
+   args[0] = i;
+   args[1] = llvm_chan;
+   args[2] = attr_number;
+   args[3] = params;
+
+   p1 = ac_emit_llvm_intrinsic(ctx, "llvm.amdgcn.interp.p1",
+   ctx->f32, args, 4, AC_FUNC_ATTR_READNONE);
+
+   args[0] = p1;
+   args[1] = j;
+   args[2] = llvm_chan;
+   args[3] = attr_number;
+   args[4] = params;
+
+   return ac_emit_llvm_intrinsic(ctx, "llvm.amdgcn.interp.p2",
+ ctx->f32, args, 5, AC_FUNC_ATTR_READNONE);
+}
+
+LLVMValueRef
+ac_build_fs_interp_mov(struct ac_llvm_context *ctx,
+  LLVMValueRef parameter,
+  LLVMValueRef llvm_chan,
+  LLVMValueRef attr_number,
+  LLVMValueRef params)
+{
+   LLVMValueRef args[4];
+   if (HAVE_LLVM < 0x0400) {
+   args[0] = llvm_chan;
+   args[1] = attr_number;
+   args[2] = params;
+
+   return ac_emit_llvm_intrinsic(ctx,
+ "llvm.SI.fs.constant",
+ ctx->f32, args, 3,
+ AC_FUNC_ATTR_READNONE);
+   }
+
+   args[0] = parameter;
+   args[1] = llvm_chan;
+   args[2] = attr_number;
+   args[3] = params;
+
+   return ac_emit_llvm_intrinsic(ctx, "llvm.amdgcn.interp.mov",
+ ctx->f32, args, 4, AC_FUNC_ATTR_READNONE);
+}
diff --git a/src/amd/common/ac_llvm_util.h b/src/amd/common/ac_llvm_util.h
index c07f67a..2a33765 100644
--- a/src/amd/common/ac_llvm_util.h
+++ b/src/amd/common/ac_llvm_util.h
@@ -98,6 +98,20 @@ ac_prepare_cube_coords(struct ac_llvm_context *ctx,
 void
 ac_dump_module(LLVMModuleRef module);

+LLVMValueRef
+ac_build_fs_interp(struct ac_llvm_context *ctx,
+  LLVMValueRef llvm_chan,
+  LLVMValueRef attr_number,
+  LLVMValueRef params,
+  LLVMValueRef i,
+  LLVMValueRef j);
+
+LLVMValueRef
+ac_build_fs_interp_mov(struct ac_llvm_context *ctx,
+  LLVMValueRef parameter,
+  LLVMValueRef llvm_chan,
+  LLVMValueRef attr_number,
+  LLVMValueRef params);
 #ifdef __cplusplus
 }
 #endif
diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
b/src/gallium/drivers/radeonsi/si_shader.c
index 5c5f2e6..9ae12d6 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -1281,80 +1281,6 @@ static int lookup_interp_param_index(unsigned 
interpolate, unsigned location)
}
 }

-static LLVMValueRef build_fs_interp(
-   struct lp_build_tgsi_context *bld_base,
-   LLVMValueRef llvm_chan,
-   LLVMValueRef attr_number,
-   LLVMValueRef params,
-   LLVMValueRef i,
-   LLVMValueRef j) {
-
-   struct si_shader_context *ctx = si_shader_context(bld_base);
-   struct gallivm_state *gallivm = bld_base->base.gallivm;
-   LLVMValueRef args[5];
-   LLVMValueRef p1;
-   if (HAVE_LLVM &l

Re: [Mesa-dev] [PATCH 2/2] radeonsi/ac: move tbuffer store and buffer load to shared code.

2017-02-02 Thread Nicolai Hähnle

On 02.02.2017 00:15, Dave Airlie wrote:

From: Dave Airlie 

These are all reuseable by radv.

Signed-off-by: Dave Airlie 
---
 src/amd/common/ac_llvm_util.c| 154 +
 src/amd/common/ac_llvm_util.h|  44 +-
 src/gallium/drivers/radeonsi/si_shader.c | 225 ++-
 3 files changed, 237 insertions(+), 186 deletions(-)

diff --git a/src/amd/common/ac_llvm_util.c b/src/amd/common/ac_llvm_util.c
index 43eeaac..36ecbb1 100644
--- a/src/amd/common/ac_llvm_util.c
+++ b/src/amd/common/ac_llvm_util.c
@@ -35,6 +35,8 @@
 #include "util/bitscan.h"
 #include "util/macros.h"

+#include "sid.h"
+
 static void ac_init_llvm_target()
 {
 #if HAVE_LLVM < 0x0307
@@ -157,8 +159,14 @@ ac_llvm_context_init(struct ac_llvm_context *ctx, 
LLVMContextRef context)
ctx->module = NULL;
ctx->builder = NULL;

+   ctx->voidt = LLVMVoidTypeInContext(ctx->context);
+   ctx->i1 = LLVMInt1TypeInContext(ctx->context);
+   ctx->i8 = LLVMInt8TypeInContext(ctx->context);
ctx->i32 = LLVMIntTypeInContext(ctx->context, 32);
ctx->f32 = LLVMFloatTypeInContext(ctx->context);
+   ctx->v4i32 = LLVMVectorType(ctx->i32, 4);
+   ctx->v4f32 = LLVMVectorType(ctx->f32, 4);
+   ctx->v16i8 = LLVMVectorType(ctx->i8, 16);

ctx->invariant_load_md_kind = LLVMGetMDKindIDInContext(ctx->context,
   
"invariant.load", 14);
@@ -648,3 +656,149 @@ ac_build_indexed_load_const(struct ac_llvm_context *ctx,
LLVMSetMetadata(result, ctx->invariant_load_md_kind, ctx->empty_md);
return result;
 }
+
+/* TBUFFER_STORE_FORMAT_{X,XY,XYZ,XYZW} <- the suffix is selected by 
num_channels=1..4.
+ * The type of vdata must be one of i32 (num_channels=1), v2i32 
(num_channels=2),
+ * or v4i32 (num_channels=3,4). */


Closing */ on a line of its own, please. (This was inconsistent style in 
the original code, but let's fix it now.)




+void
+ac_build_tbuffer_store(struct ac_llvm_context *ctx,
+  LLVMValueRef rsrc,
+  LLVMValueRef vdata,
+  unsigned num_channels,
+  LLVMValueRef vaddr,
+  LLVMValueRef soffset,
+  unsigned inst_offset,
+  unsigned dfmt,
+  unsigned nfmt,
+  unsigned offen,
+  unsigned idxen,
+  unsigned glc,
+  unsigned slc,
+  unsigned tfe)
+{
+   LLVMValueRef args[] = {
+   rsrc,
+   vdata,
+   LLVMConstInt(ctx->i32, num_channels, 0),
+   vaddr,
+   soffset,
+   LLVMConstInt(ctx->i32, inst_offset, 0),
+   LLVMConstInt(ctx->i32, dfmt, 0),
+   LLVMConstInt(ctx->i32, nfmt, 0),
+   LLVMConstInt(ctx->i32, offen, 0),
+   LLVMConstInt(ctx->i32, idxen, 0),
+   LLVMConstInt(ctx->i32, glc, 0),
+   LLVMConstInt(ctx->i32, slc, 0),
+   LLVMConstInt(ctx->i32, tfe, 0)
+   };
+
+   /* The instruction offset field has 12 bits */
+   assert(offen || inst_offset < (1 << 12));
+
+   /* The intrinsic is overloaded, we need to add a type suffix for 
overloading to work. */
+   unsigned func = CLAMP(num_channels, 1, 3) - 1;
+   const char *types[] = {"i32", "v2i32", "v4i32"};
+   char name[256];
+   snprintf(name, sizeof(name), "llvm.SI.tbuffer.store.%s", types[func]);
+
+   ac_emit_llvm_intrinsic(ctx, name, ctx->voidt,
+  args, ARRAY_SIZE(args), 0);
+}
+
+void
+ac_build_tbuffer_store_dwords(struct ac_llvm_context *ctx,
+ LLVMValueRef rsrc,
+ LLVMValueRef vdata,
+ unsigned num_channels,
+ LLVMValueRef vaddr,
+ LLVMValueRef soffset,
+ unsigned inst_offset)
+{
+   static unsigned dfmt[] = {
+   V_008F0C_BUF_DATA_FORMAT_32,
+   V_008F0C_BUF_DATA_FORMAT_32_32,
+   V_008F0C_BUF_DATA_FORMAT_32_32_32,
+   V_008F0C_BUF_DATA_FORMAT_32_32_32_32
+   };
+   assert(num_channels >= 1 && num_channels <= 4);
+
+   ac_build_tbuffer_store(ctx, rsrc, vdata, num_channels, vaddr, soffset,
+  inst_offset, dfmt[num_channels-1],


Spaces around the '-'.

With those fixed, the series is

Reviewed-by: Nicolai Hähnle 


+  V_008F0C_BUF_NUM_FORMAT_UINT, 1, 0, 1, 1, 0);
+}
+
+LLVMValueRef
+ac_build_buffer_load(struct ac_llvm

Re: [Mesa-dev] [PATCH 2/2] radv/ac: use shared thread id code

2017-02-02 Thread Nicolai Hähnle

For the series:

Reviewed-by: Nicolai Hähnle 

On 02.02.2017 00:41, Dave Airlie wrote:

From: Dave Airlie 

Signed-off-by: Dave Airlie 
---
 src/amd/common/ac_nir_to_llvm.c | 44 ++---
 1 file changed, 2 insertions(+), 42 deletions(-)

diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index 7a26333..45aeaf7 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -135,7 +135,6 @@ struct nir_to_llvm_context {
LLVMValueRef f32one;
LLVMValueRef v4f32empty;

-   unsigned range_md_kind;
unsigned uniform_md_kind;
LLVMValueRef empty_md;
gl_shader_stage stage;
@@ -703,8 +702,6 @@ static void setup_types(struct nir_to_llvm_context *ctx)
args[3] = ctx->f32one;
ctx->v4f32empty = LLVMConstVector(args, 4);

-   ctx->range_md_kind = LLVMGetMDKindIDInContext(ctx->context,
- "range", 5);
ctx->uniform_md_kind =
LLVMGetMDKindIDInContext(ctx->context, "amdgpu.uniform", 14);
ctx->empty_md = LLVMMDNodeInContext(ctx->context, NULL, 0);
@@ -1174,43 +1171,6 @@ static LLVMValueRef emit_unpack_half_2x16(struct 
nir_to_llvm_context *ctx,
return result;
 }

-/**
- * Set range metadata on an instruction.  This can only be used on load and
- * call instructions.  If you know an instruction can only produce the values
- * 0, 1, 2, you would do set_range_metadata(value, 0, 3);
- * \p lo is the minimum value inclusive.
- * \p hi is the maximum value exclusive.
- */
-static void set_range_metadata(struct nir_to_llvm_context *ctx,
-  LLVMValueRef value, unsigned lo, unsigned hi)
-{
-   LLVMValueRef range_md, md_args[2];
-   LLVMTypeRef type = LLVMTypeOf(value);
-   LLVMContextRef context = LLVMGetTypeContext(type);
-
-   md_args[0] = LLVMConstInt(type, lo, false);
-   md_args[1] = LLVMConstInt(type, hi, false);
-   range_md = LLVMMDNodeInContext(context, md_args, 2);
-   LLVMSetMetadata(value, ctx->range_md_kind, range_md);
-}
-
-static LLVMValueRef get_thread_id(struct nir_to_llvm_context *ctx)
-{
-   LLVMValueRef tid;
-   LLVMValueRef tid_args[2];
-   tid_args[0] = LLVMConstInt(ctx->i32, 0x, false);
-   tid_args[1] = ctx->i32zero;
-   tid_args[1] = ac_emit_llvm_intrinsic(&ctx->ac,
- "llvm.amdgcn.mbcnt.lo", ctx->i32,
- tid_args, 2, AC_FUNC_ATTR_READNONE);
-
-   tid = ac_emit_llvm_intrinsic(&ctx->ac,
- "llvm.amdgcn.mbcnt.hi", ctx->i32,
- tid_args, 2, AC_FUNC_ATTR_READNONE);
-   set_range_metadata(ctx, tid, 0, 64);
-   return tid;
-}
-
 /*
  * SI implements derivatives using the local data store (LDS)
  * All writes to the LDS happen in all executing threads at
@@ -1256,7 +1216,7 @@ static LLVMValueRef emit_ddxy(struct nir_to_llvm_context 
*ctx,
   LLVMArrayType(ctx->i32, 
64),
   "ddxy_lds", 
LOCAL_ADDR_SPACE);

-   thread_id = get_thread_id(ctx);
+   thread_id = ac_get_thread_id(&ctx->ac);
if (op == nir_op_fddx_fine || op == nir_op_fddx)
mask = TID_MASK_LEFT;
else if (op == nir_op_fddy_fine || op == nir_op_fddy)
@@ -2876,7 +2836,7 @@ static LLVMValueRef
 visit_load_local_invocation_index(struct nir_to_llvm_context *ctx)
 {
LLVMValueRef result;
-   LLVMValueRef thread_id = get_thread_id(ctx);
+   LLVMValueRef thread_id = ac_get_thread_id(&ctx->ac);
result = LLVMBuildAnd(ctx->builder, ctx->tg_size,
  LLVMConstInt(ctx->i32, 0xfc0, false), "");



___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/2] radv/ac: move to using shared emit_ddxy code.

2017-02-02 Thread Nicolai Hähnle

For the series:

Reviewed-by: Nicolai Hähnle 

On 02.02.2017 00:56, Dave Airlie wrote:

From: Dave Airlie 

Signed-off-by: Dave Airlie 
---
 src/amd/common/ac_nir_to_llvm.c | 75 -
 1 file changed, 7 insertions(+), 68 deletions(-)

diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index 45aeaf7..e8dc752 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -1171,44 +1171,13 @@ static LLVMValueRef emit_unpack_half_2x16(struct 
nir_to_llvm_context *ctx,
return result;
 }

-/*
- * SI implements derivatives using the local data store (LDS)
- * All writes to the LDS happen in all executing threads at
- * the same time. TID is the Thread ID for the current
- * thread and is a value between 0 and 63, representing
- * the thread's position in the wavefront.
- *
- * For the pixel shader threads are grouped into quads of four pixels.
- * The TIDs of the pixels of a quad are:
- *
- *  +--+--+
- *  |4n + 0|4n + 1|
- *  +--+--+
- *  |4n + 2|4n + 3|
- *  +--+--+
- *
- * So, masking the TID with 0xfffc yields the TID of the top left pixel
- * of the quad, masking with 0xfffd yields the TID of the top pixel of
- * the current pixel's column, and masking with 0xfffe yields the TID
- * of the left pixel of the current pixel's row.
- *
- * Adding 1 yields the TID of the pixel to the right of the left pixel, and
- * adding 2 yields the TID of the pixel below the top pixel.
- */
-/* masks for thread ID. */
-#define TID_MASK_TOP_LEFT 0xfffc
-#define TID_MASK_TOP  0xfffd
-#define TID_MASK_LEFT 0xfffe
 static LLVMValueRef emit_ddxy(struct nir_to_llvm_context *ctx,
  nir_op op,
  LLVMValueRef src0)
 {
-   LLVMValueRef tl, trbl, result;
-   LLVMValueRef tl_tid, trbl_tid;
-   LLVMValueRef args[2];
-   LLVMValueRef thread_id;
unsigned mask;
int idx;
+   LLVMValueRef result;
ctx->has_ddxy = true;

if (!ctx->lds && !ctx->has_ds_bpermute)
@@ -1216,16 +1185,13 @@ static LLVMValueRef emit_ddxy(struct 
nir_to_llvm_context *ctx,
   LLVMArrayType(ctx->i32, 
64),
   "ddxy_lds", 
LOCAL_ADDR_SPACE);

-   thread_id = ac_get_thread_id(&ctx->ac);
if (op == nir_op_fddx_fine || op == nir_op_fddx)
-   mask = TID_MASK_LEFT;
+   mask = AC_TID_MASK_LEFT;
else if (op == nir_op_fddy_fine || op == nir_op_fddy)
-   mask = TID_MASK_TOP;
+   mask = AC_TID_MASK_TOP;
else
-   mask = TID_MASK_TOP_LEFT;
+   mask = AC_TID_MASK_TOP_LEFT;

-   tl_tid = LLVMBuildAnd(ctx->builder, thread_id,
- LLVMConstInt(ctx->i32, mask, false), "");
/* for DDX we want to next X pixel, DDY next Y pixel. */
if (op == nir_op_fddx_fine ||
op == nir_op_fddx_coarse ||
@@ -1234,36 +1200,9 @@ static LLVMValueRef emit_ddxy(struct nir_to_llvm_context 
*ctx,
else
idx = 2;

-   trbl_tid = LLVMBuildAdd(ctx->builder, tl_tid,
-   LLVMConstInt(ctx->i32, idx, false), "");
-
-   if (ctx->has_ds_bpermute) {
-   args[0] = LLVMBuildMul(ctx->builder, tl_tid,
-  LLVMConstInt(ctx->i32, 4, false), "");
-   args[1] = src0;
-   tl = ac_emit_llvm_intrinsic(&ctx->ac, "llvm.amdgcn.ds.bpermute",
-ctx->i32, args, 2,
-AC_FUNC_ATTR_READNONE);
-
-   args[0] = LLVMBuildMul(ctx->builder, trbl_tid,
-  LLVMConstInt(ctx->i32, 4, false), "");
-   trbl = ac_emit_llvm_intrinsic(&ctx->ac, 
"llvm.amdgcn.ds.bpermute",
-  ctx->i32, args, 2,
-  AC_FUNC_ATTR_READNONE);
-   } else {
-   LLVMValueRef store_ptr, load_ptr0, load_ptr1;
-
-   store_ptr = ac_build_gep0(&ctx->ac, ctx->lds, thread_id);
-   load_ptr0 = ac_build_gep0(&ctx->ac, ctx->lds, tl_tid);
-   load_ptr1 = ac_build_gep0(&ctx->ac, ctx->lds, trbl_tid);
-
-   LLVMBuildStore(ctx->builder, src0, store_ptr);
-   tl = LLVMBuildLoad(ctx->builder, load_ptr0, "");
-   trbl = LLVMBuildLoad(ctx->builder, load_ptr1, "");
-   }
-   tl = LLVMBuildBitCast(ctx->builder, tl, ctx->f32, "");
-   trbl = LLVMBuildBitCast(ctx->builder, trbl, ctx->f

Re: [Mesa-dev] [RFC] EGL extension for additional DRM_BUFFER_FORMATs

2017-02-02 Thread Nicolai Hähnle

Ping after a week - does anybody still want to take a look at this?

On 26.01.2017 12:50, Nicolai Hähnle wrote:

Hi all,

this is mostly motivated by the need to support more screen depths than
just 24/32-bit RGB in Glamor. The extension is simple enough, just adding
three more enums that are accepted.

I _think_ every DRI driver that exposes EGL_MESA_drm_image should be able
to support this new extension as well, since EGL_MESA_drm_image requires
DRI image version 10 to be enabled, and the corresponding
__DRI_IMAGE_FORMAT_* values have been there since DRI image version 5, but
do let me know if that thinking is wrong.

I'd like to land at least the first patch relatively quickly, to avoid future
enum clashes. Internally, we had already accidentally used an enum that is
used by one of the WL extensions, since those are not in the EGL registry...

Please review / provide feedback!

Thanks,
Nicolai


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v2 2/4] glx/dri3: guard in_current_context against a disappeared drawable

2017-02-02 Thread Nicolai Hähnle
From: Nicolai Hähnle 

Cc: 17.0 
---
 src/glx/dri3_glx.c | 4 
 1 file changed, 4 insertions(+)

diff --git a/src/glx/dri3_glx.c b/src/glx/dri3_glx.c
index f7bcba3..2d40f0a 100644
--- a/src/glx/dri3_glx.c
+++ b/src/glx/dri3_glx.c
@@ -112,20 +112,24 @@ static void
 glx_dri3_set_drawable_size(struct loader_dri3_drawable *draw,
int width, int height)
 {
/* Nothing to do */
 }
 
 static bool
 glx_dri3_in_current_context(struct loader_dri3_drawable *draw)
 {
struct dri3_drawable *priv = loader_drawable_to_dri3_drawable(draw);
+
+   if (!priv)
+  return false;
+
struct dri3_context *pcp = (struct dri3_context *) __glXGetCurrentContext();
struct dri3_screen *psc = (struct dri3_screen *) priv->base.psc;
 
return (&pcp->base != &dummyContext) && pcp->base.psc == &psc->base;
 }
 
 static __DRIcontext *
 glx_dri3_get_dri_context(struct loader_dri3_drawable *draw)
 {
struct glx_context *gc = __glXGetCurrentContext();
-- 
2.9.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v2 3/4] glx: guard swap-interval functions against destroyed drawables

2017-02-02 Thread Nicolai Hähnle
From: Nicolai Hähnle 

The GLX specification says about glXDestroyPixmap:

"The storage for the GLX pixmap will be freed when it is not current
 to any client."

So arguably, functions like glXSwapIntervalMESA can be called after
glXDestroyPixmap has been called for the currently bound GLXPixmap.
In that case, the GLXDRIDrawable no longer exists, and so we just skip
those calls.

Cc: 17.0 
---
 src/glx/dri3_glx.c |  4 
 src/glx/glxcmds.c  | 18 +++---
 2 files changed, 19 insertions(+), 3 deletions(-)

diff --git a/src/glx/dri3_glx.c b/src/glx/dri3_glx.c
index 2d40f0a..42a94f9 100644
--- a/src/glx/dri3_glx.c
+++ b/src/glx/dri3_glx.c
@@ -557,20 +557,22 @@ dri3_destroy_screen(struct glx_screen *base)
free(psc);
 }
 
 /** dri3_set_swap_interval
  *
  * Record the application swap interval specification,
  */
 static int
 dri3_set_swap_interval(__GLXDRIdrawable *pdraw, int interval)
 {
+   assert(pdraw != NULL);
+
struct dri3_drawable *priv =  (struct dri3_drawable *) pdraw;
GLint vblank_mode = DRI_CONF_VBLANK_DEF_INTERVAL_1;
struct dri3_screen *psc = (struct dri3_screen *) priv->base.psc;
 
if (psc->config)
   psc->config->configQueryi(psc->driScreen,
 "vblank_mode", &vblank_mode);
 
switch (vblank_mode) {
case DRI_CONF_VBLANK_NEVER:
@@ -590,20 +592,22 @@ dri3_set_swap_interval(__GLXDRIdrawable *pdraw, int 
interval)
return 0;
 }
 
 /** dri3_get_swap_interval
  *
  * Return the stored swap interval
  */
 static int
 dri3_get_swap_interval(__GLXDRIdrawable *pdraw)
 {
+   assert(pdraw != NULL);
+
struct dri3_drawable *priv =  (struct dri3_drawable *) pdraw;
 
   return priv->swap_interval;
 }
 
 static void
 dri3_bind_tex_image(Display * dpy,
 GLXDrawable drawable,
 int buffer, const int *attrib_list)
 {
diff --git a/src/glx/glxcmds.c b/src/glx/glxcmds.c
index 6c7bbfd..53c9f9c 100644
--- a/src/glx/glxcmds.c
+++ b/src/glx/glxcmds.c
@@ -1754,21 +1754,25 @@ __glXSwapIntervalSGI(int interval)
   return GLX_BAD_VALUE;
}
 
psc = GetGLXScreenConfigs( gc->currentDpy, gc->screen);
 
 #ifdef GLX_DIRECT_RENDERING
if (gc->isDirect && psc && psc->driScreen &&
   psc->driScreen->setSwapInterval) {
   __GLXDRIdrawable *pdraw =
 GetGLXDRIDrawable(gc->currentDpy, gc->currentDrawable);
-  psc->driScreen->setSwapInterval(pdraw, interval);
+  /* Simply ignore the command if the GLX drawable has been destroyed but
+   * the context is still bound.
+   */
+  if (pdraw)
+ psc->driScreen->setSwapInterval(pdraw, interval);
   return 0;
}
 #endif
 
dpy = gc->currentDpy;
opcode = __glXSetupForCommand(dpy);
if (!opcode) {
   return 0;
}
 
@@ -1800,21 +1804,28 @@ __glXSwapIntervalMESA(unsigned int interval)
 #ifdef GLX_DIRECT_RENDERING
struct glx_context *gc = __glXGetCurrentContext();
 
if (gc != &dummyContext && gc->isDirect) {
   struct glx_screen *psc;
 
   psc = GetGLXScreenConfigs( gc->currentDpy, gc->screen);
   if (psc && psc->driScreen && psc->driScreen->setSwapInterval) {
  __GLXDRIdrawable *pdraw =
GetGLXDRIDrawable(gc->currentDpy, gc->currentDrawable);
-return psc->driScreen->setSwapInterval(pdraw, interval);
+
+ /* Simply ignore the command if the GLX drawable has been destroyed 
but
+  * the context is still bound.
+  */
+ if (!pdraw)
+return 0;
+
+ return psc->driScreen->setSwapInterval(pdraw, interval);
   }
}
 #endif
 
return GLX_BAD_CONTEXT;
 }
 
 
 static int
 __glXGetSwapIntervalMESA(void)
@@ -1822,21 +1833,22 @@ __glXGetSwapIntervalMESA(void)
 #ifdef GLX_DIRECT_RENDERING
struct glx_context *gc = __glXGetCurrentContext();
 
if (gc != &dummyContext && gc->isDirect) {
   struct glx_screen *psc;
 
   psc = GetGLXScreenConfigs( gc->currentDpy, gc->screen);
   if (psc && psc->driScreen && psc->driScreen->getSwapInterval) {
  __GLXDRIdrawable *pdraw =
GetGLXDRIDrawable(gc->currentDpy, gc->currentDrawable);
-return psc->driScreen->getSwapInterval(pdraw);
+ if (pdraw)
+return psc->driScreen->getSwapInterval(pdraw);
   }
}
 #endif
 
return 0;
 }
 
 
 /*
 ** GLX_SGI_video_sync
-- 
2.9.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v2 0/4] glx/dri: improve dealing with the destruction of the currently bound drawable

2017-02-02 Thread Nicolai Hähnle
Hi all,

upon Emil's encouragement, I looked a bit deeper into this issue. The first
and last patch are as before, just in reverse order. I'm not completely
certain that patch #2 is needed, but it seems like a good idea. Patch #3 is
hopefully straightforward.

Please review!

Thanks,
Nicolai
--
 src/glx/dri3_glx.c | 10 ++
 src/glx/glxcmds.c  | 18 +++---
 src/mesa/drivers/dri/common/dri_util.c | 12 
 3 files changed, 37 insertions(+), 3 deletions(-)

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v2 4/4] dri/common: clear the loaderPrivate pointer in driDestroyDrawable

2017-02-02 Thread Nicolai Hähnle
From: Nicolai Hähnle 

The GLX specification says about glXDestroyPixmap:

"The storage for the GLX pixmap will be freed when it is not current
 to any client."

We're not really following this language to the letter: some of the storage
is freed immediately (in particular, the dri3_drawable, which contains both
GLXDRIdrawable and loader_dri3_drawable). So we NULL out the pointers to
that freed storage; the previous patches added the corresponding NULL-pointer
checks.

This fixes memory corruption in piglit
./bin/glx-visuals-depth/stencil -pixmap -auto

Cc: 17.0 
Reviewed-by: Marek Olšák 
---
 src/mesa/drivers/dri/common/dri_util.c | 12 
 1 file changed, 12 insertions(+)

diff --git a/src/mesa/drivers/dri/common/dri_util.c 
b/src/mesa/drivers/dri/common/dri_util.c
index f92eee9..d18c458 100644
--- a/src/mesa/drivers/dri/common/dri_util.c
+++ b/src/mesa/drivers/dri/common/dri_util.c
@@ -638,20 +638,22 @@ static void dri_put_drawable(__DRIdrawable *pdp)
 }
 }
 
 static __DRIdrawable *
 driCreateNewDrawable(__DRIscreen *screen,
  const __DRIconfig *config,
  void *data)
 {
 __DRIdrawable *pdraw;
 
+assert(data != NULL);
+
 pdraw = malloc(sizeof *pdraw);
 if (!pdraw)
return NULL;
 
 pdraw->loaderPrivate = data;
 
 pdraw->driScreenPriv = screen;
 pdraw->driContextPriv = NULL;
 pdraw->refcount = 0;
 pdraw->lastStamp = 0;
@@ -667,20 +669,30 @@ driCreateNewDrawable(__DRIscreen *screen,
 }
 
 pdraw->dri2.stamp = pdraw->lastStamp + 1;
 
 return pdraw;
 }
 
 static void
 driDestroyDrawable(__DRIdrawable *pdp)
 {
+/*
+ * The loader's data structures are going away, even if pdp itself stays
+ * around for the time being because it is currently bound. This happens
+ * when a currently bound GLX pixmap is destroyed.
+ *
+ * Clear out the pointer back into the loader's data structures to avoid
+ * accessing an outdated pointer.
+ */
+pdp->loaderPrivate = NULL;
+
 dri_put_drawable(pdp);
 }
 
 static __DRIbuffer *
 dri2AllocateBuffer(__DRIscreen *screen,
   unsigned int attachment, unsigned int format,
   int width, int height)
 {
 return screen->driver->AllocateBuffer(screen, attachment, format,
   width, height);
-- 
2.9.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v2 1/4] glx/dri3: handle NULL pointers in loader-to-DRI3 drawable conversion

2017-02-02 Thread Nicolai Hähnle
From: Nicolai Hähnle 

With a subsequent patch, we might see NULL loaderPrivates, e.g. when
a DRIdrawable is flushed whose corresponding GLXDRIdrawable was destroyed.
This resulted in a crash, since the loader vs. DRI3 drawable structures
have a non-zero offset.

Fixes glx-visuals-{depth,stencil} -pixmap

Cc: 17.0 
Reviewed-by: Marek Olšák 
---
 src/glx/dri3_glx.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/glx/dri3_glx.c b/src/glx/dri3_glx.c
index 4472a0b..f7bcba3 100644
--- a/src/glx/dri3_glx.c
+++ b/src/glx/dri3_glx.c
@@ -74,20 +74,22 @@
 #include 
 
 #include "dri_common.h"
 #include "dri3_priv.h"
 #include "loader.h"
 #include "dri2.h"
 
 static struct dri3_drawable *
 loader_drawable_to_dri3_drawable(struct loader_dri3_drawable *draw) {
size_t offset = offsetof(struct dri3_drawable, loader_drawable);
+   if (!draw)
+  return NULL;
return (struct dri3_drawable *)(((void*) draw) - offset);
 }
 
 static int
 glx_dri3_get_swap_interval(struct loader_dri3_drawable *draw)
 {
struct dri3_drawable *priv = loader_drawable_to_dri3_drawable(draw);
 
return priv->swap_interval;
 }
-- 
2.9.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] Revert "radeonsi: decrease the number of texture slots to 24"

2017-02-02 Thread Nicolai Hähnle

On 02.02.2017 19:46, Marek Olšák wrote:

From: Marek Olšák 

This reverts commit bdd860e3076655519d45bd66936ef7be9b7dda63.

Requested by a game developer.

Cc: 17.0 


We're really being pushed towards bindless_texture :)

Reviewed-by: Nicolai Hähnle 


---
 src/gallium/drivers/radeonsi/si_state.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/gallium/drivers/radeonsi/si_state.h 
b/src/gallium/drivers/radeonsi/si_state.h
index 6bb0673..3f08f54 100644
--- a/src/gallium/drivers/radeonsi/si_state.h
+++ b/src/gallium/drivers/radeonsi/si_state.h
@@ -28,21 +28,21 @@
 #define SI_STATE_H

 #include "si_pm4.h"
 #include "radeon/r600_pipe_common.h"

 #define SI_NUM_GRAPHICS_SHADERS (PIPE_SHADER_TESS_EVAL+1)
 #define SI_NUM_SHADERS (PIPE_SHADER_COMPUTE+1)

 #define SI_MAX_ATTRIBS 16
 #define SI_NUM_VERTEX_BUFFERS  SI_MAX_ATTRIBS
-#define SI_NUM_SAMPLERS24 /* OpenGL textures units per 
shader */
+#define SI_NUM_SAMPLERS32 /* OpenGL textures units per 
shader */
 #define SI_NUM_CONST_BUFFERS   16
 #define SI_NUM_IMAGES  16
 #define SI_NUM_SHADER_BUFFERS  16

 struct si_screen;
 struct si_shader;

 struct si_state_blend {
struct si_pm4_state pm4;
uint32_tcb_target_mask;



___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] radeon/ac: move common llvm build functions to a separate file.

2017-02-06 Thread Nicolai Hähnle

Acked-by: Nicolai Hähnle 

On 03.02.2017 01:05, Dave Airlie wrote:

From: Dave Airlie 

Suggested by Marek.

Signed-off-by: Dave Airlie 
---
 src/amd/Makefile.sources  |   2 +
 src/amd/common/ac_llvm_build.c| 752 ++
 src/amd/common/ac_llvm_build.h| 177 +
 src/amd/common/ac_llvm_util.c | 717 +
 src/amd/common/ac_llvm_util.h | 135 
 src/amd/common/ac_nir_to_llvm.c   |   1 +
 src/gallium/drivers/radeonsi/si_shader_internal.h |   1 +
 7 files changed, 934 insertions(+), 851 deletions(-)
 create mode 100644 src/amd/common/ac_llvm_build.c
 create mode 100644 src/amd/common/ac_llvm_build.h

diff --git a/src/amd/Makefile.sources b/src/amd/Makefile.sources
index d981453..7aaa90a 100644
--- a/src/amd/Makefile.sources
+++ b/src/amd/Makefile.sources
@@ -29,6 +29,8 @@ ADDRLIB_FILES = \
 AMD_COMPILER_FILES = \
common/ac_binary.c \
common/ac_binary.h \
+   common/ac_llvm_build.c \
+   common/ac_llvm_build.h \
common/ac_llvm_helper.cpp \
common/ac_llvm_util.c \
common/ac_llvm_util.h
diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c
new file mode 100644
index 000..afcbf31
--- /dev/null
+++ b/src/amd/common/ac_llvm_build.c
@@ -0,0 +1,752 @@
+/*
+ * Copyright 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ */
+/* based on pieces from si_pipe.c and radeon_llvm_emit.c */
+#include "ac_llvm_build.h"
+
+#include 
+
+#include "c11/threads.h"
+
+#include 
+#include 
+
+#include "ac_llvm_util.h"
+
+#include "util/bitscan.h"
+#include "util/macros.h"
+#include "sid.h"
+
+/* Initialize module-independent parts of the context.
+ *
+ * The caller is responsible for initializing ctx::module and ctx::builder.
+ */
+void
+ac_llvm_context_init(struct ac_llvm_context *ctx, LLVMContextRef context)
+{
+   LLVMValueRef args[1];
+
+   ctx->context = context;
+   ctx->module = NULL;
+   ctx->builder = NULL;
+
+   ctx->voidt = LLVMVoidTypeInContext(ctx->context);
+   ctx->i1 = LLVMInt1TypeInContext(ctx->context);
+   ctx->i8 = LLVMInt8TypeInContext(ctx->context);
+   ctx->i32 = LLVMIntTypeInContext(ctx->context, 32);
+   ctx->f32 = LLVMFloatTypeInContext(ctx->context);
+   ctx->v4i32 = LLVMVectorType(ctx->i32, 4);
+   ctx->v4f32 = LLVMVectorType(ctx->f32, 4);
+   ctx->v16i8 = LLVMVectorType(ctx->i8, 16);
+
+   ctx->range_md_kind = LLVMGetMDKindIDInContext(ctx->context,
+"range", 5);
+
+   ctx->invariant_load_md_kind = LLVMGetMDKindIDInContext(ctx->context,
+  
"invariant.load", 14);
+
+   ctx->fpmath_md_kind = LLVMGetMDKindIDInContext(ctx->context, "fpmath", 
6);
+
+   args[0] = LLVMConstReal(ctx->f32, 2.5);
+   ctx->fpmath_md_2p5_ulp = LLVMMDNodeInContext(ctx->context, args, 1);
+
+   ctx->uniform_md_kind = LLVMGetMDKindIDInContext(ctx->context,
+   "amdgpu.uniform", 14);
+
+   ctx->empty_md = LLVMMDNodeInContext(ctx->context, NULL, 0);
+}
+
+LLVMValueRef
+ac_emit_llvm_intrinsic(struct ac_llvm_context *ctx, const char *name,
+  LLVMTypeRef return_type, LLVMValueRef *params,
+  unsigned param_count, unsigned attrib_mask)
+{
+   LLVMValueRef function;
+
+   function = LLVMGetNamedFunction(ctx->module, name);
+   if (!function) {
+   LLVMTypeRef param_types[32], fun

Re: [Mesa-dev] [PATCH 3/3] targets: Remove vc4 simulator hack.

2017-02-06 Thread Nicolai Hähnle

On 03.02.2017 20:13, Eric Anholt wrote:

Now that there's MESA_LOADER_DRIVER_OVERRIDE for choosing the driver name
we load, we don't need this any more.


I like the override flag, could come in handy.

For the series (including the additional hunk for patch #3):

Reviewed-by: Nicolai Hähnle 



---
 src/gallium/targets/dri/target.c | 10 --
 1 file changed, 10 deletions(-)

diff --git a/src/gallium/targets/dri/target.c b/src/gallium/targets/dri/target.c
index df93c94ea832..d24a61d1563d 100644
--- a/src/gallium/targets/dri/target.c
+++ b/src/gallium/targets/dri/target.c
@@ -73,16 +73,6 @@ DEFINE_LOADER_DRM_ENTRYPOINT(virtio_gpu)

 #if defined(GALLIUM_VC4)
 DEFINE_LOADER_DRM_ENTRYPOINT(vc4)
-
-#if defined(USE_VC4_SIMULATOR)
-/**
- * When building using the simulator (on x86), we advertise ourselves as the
- * i965 driver so that you can just make a directory with a link from
- * i965_dri.so to the built vc4_dri.so, and point LIBGL_DRIVERS_PATH to that
- * on your i965-using host to run the driver under simulation.
- */
-DEFINE_LOADER_DRM_ENTRYPOINT(i965)
-#endif
 #endif

 #if defined(GALLIUM_ETNAVIV)



___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/8] gallium: add separate PIPE_CAP_INT64_DIVMOD

2017-02-06 Thread Nicolai Hähnle

On 05.02.2017 19:20, Ilia Mirkin wrote:

Nouveau does not currently have logic to implement this as a library
function. Even though such a library could be written, there's no big
advantage to do it that way for now given that int64 is a very uncommon
use-case. Allow a driver to expose INT64 without supporting division and
modulo operations.


Seems reasonable. This patch is:

Reviewed-by: Nicolai Hähnle 




Signed-off-by: Ilia Mirkin 
---
 src/gallium/docs/source/screen.rst   | 2 ++
 src/gallium/drivers/etnaviv/etnaviv_screen.c | 1 +
 src/gallium/drivers/freedreno/freedreno_screen.c | 1 +
 src/gallium/drivers/i915/i915_screen.c   | 1 +
 src/gallium/drivers/llvmpipe/lp_screen.c | 1 +
 src/gallium/drivers/nouveau/nv30/nv30_screen.c   | 1 +
 src/gallium/drivers/nouveau/nv50/nv50_screen.c   | 1 +
 src/gallium/drivers/nouveau/nvc0/nvc0_screen.c   | 1 +
 src/gallium/drivers/r300/r300_screen.c   | 1 +
 src/gallium/drivers/r600/r600_pipe.c | 1 +
 src/gallium/drivers/radeonsi/si_pipe.c   | 1 +
 src/gallium/drivers/softpipe/sp_screen.c | 1 +
 src/gallium/drivers/svga/svga_screen.c   | 1 +
 src/gallium/drivers/swr/swr_screen.cpp   | 1 +
 src/gallium/drivers/vc4/vc4_screen.c | 1 +
 src/gallium/drivers/virgl/virgl_screen.c | 1 +
 src/gallium/include/pipe/p_defines.h | 1 +
 src/mesa/state_tracker/st_glsl_to_tgsi.cpp   | 3 +++
 18 files changed, 21 insertions(+)

diff --git a/src/gallium/docs/source/screen.rst 
b/src/gallium/docs/source/screen.rst
index 4f5b4bb..74c8cec 100644
--- a/src/gallium/docs/source/screen.rst
+++ b/src/gallium/docs/source/screen.rst
@@ -376,6 +376,8 @@ The integer capabilities:
 * ``PIPE_CAP_DOUBLES``: Whether double precision floating-point operations
   are supported.
 * ``PIPE_CAP_INT64``: Whether 64-bit integer operations are supported.
+* ``PIPE_CAP_INT64_DIVMOD``: Whether 64-bit integer division/modulo
+  operations are supported.


 .. _pipe_capf:
diff --git a/src/gallium/drivers/etnaviv/etnaviv_screen.c 
b/src/gallium/drivers/etnaviv/etnaviv_screen.c
index 8f2882f..e3090dc 100644
--- a/src/gallium/drivers/etnaviv/etnaviv_screen.c
+++ b/src/gallium/drivers/etnaviv/etnaviv_screen.c
@@ -243,6 +243,7 @@ etna_screen_get_param(struct pipe_screen *pscreen, enum 
pipe_cap param)
case PIPE_CAP_TGSI_MUL_ZERO_WINS:
case PIPE_CAP_DOUBLES:
case PIPE_CAP_INT64:
+   case PIPE_CAP_INT64_DIVMOD:
   return 0;

/* Stream output. */
diff --git a/src/gallium/drivers/freedreno/freedreno_screen.c 
b/src/gallium/drivers/freedreno/freedreno_screen.c
index bc8a277..1122e29 100644
--- a/src/gallium/drivers/freedreno/freedreno_screen.c
+++ b/src/gallium/drivers/freedreno/freedreno_screen.c
@@ -300,6 +300,7 @@ fd_screen_get_param(struct pipe_screen *pscreen, enum 
pipe_cap param)
case PIPE_CAP_TGSI_MUL_ZERO_WINS:
case PIPE_CAP_DOUBLES:
case PIPE_CAP_INT64:
+   case PIPE_CAP_INT64_DIVMOD:
return 0;

case PIPE_CAP_MAX_VIEWPORTS:
diff --git a/src/gallium/drivers/i915/i915_screen.c 
b/src/gallium/drivers/i915/i915_screen.c
index 76f20fc..038cca1 100644
--- a/src/gallium/drivers/i915/i915_screen.c
+++ b/src/gallium/drivers/i915/i915_screen.c
@@ -300,6 +300,7 @@ i915_get_param(struct pipe_screen *screen, enum pipe_cap 
cap)
case PIPE_CAP_TGSI_MUL_ZERO_WINS:
case PIPE_CAP_DOUBLES:
case PIPE_CAP_INT64:
+   case PIPE_CAP_INT64_DIVMOD:
   return 0;

case PIPE_CAP_MAX_VIEWPORTS:
diff --git a/src/gallium/drivers/llvmpipe/lp_screen.c 
b/src/gallium/drivers/llvmpipe/lp_screen.c
index 0b12119..76a30a6 100644
--- a/src/gallium/drivers/llvmpipe/lp_screen.c
+++ b/src/gallium/drivers/llvmpipe/lp_screen.c
@@ -269,6 +269,7 @@ llvmpipe_get_param(struct pipe_screen *screen, enum 
pipe_cap param)
case PIPE_CAP_TGSI_ARRAY_COMPONENTS:
case PIPE_CAP_DOUBLES:
case PIPE_CAP_INT64:
+   case PIPE_CAP_INT64_DIVMOD:
   return 1;

case PIPE_CAP_VENDOR_ID:
diff --git a/src/gallium/drivers/nouveau/nv30/nv30_screen.c 
b/src/gallium/drivers/nouveau/nv30/nv30_screen.c
index b43d852..6c58a0a 100644
--- a/src/gallium/drivers/nouveau/nv30/nv30_screen.c
+++ b/src/gallium/drivers/nouveau/nv30/nv30_screen.c
@@ -210,6 +210,7 @@ nv30_screen_get_param(struct pipe_screen *pscreen, enum 
pipe_cap param)
case PIPE_CAP_TGSI_MUL_ZERO_WINS:
case PIPE_CAP_DOUBLES:
case PIPE_CAP_INT64:
+   case PIPE_CAP_INT64_DIVMOD:
   return 0;

case PIPE_CAP_VENDOR_ID:
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_screen.c 
b/src/gallium/drivers/nouveau/nv50/nv50_screen.c
index 32c3de3..aa756ed 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_screen.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_screen.c
@@ -262,6 +262,7 @@ nv50_screen_get_param(struct pipe_screen *pscreen, enum 
pipe_cap param)
case PIPE_CAP_TGSI_FS_FBFETCH:
case PIPE_CAP_DOUBLES:
case PIPE_CAP_INT64:
+   case PIPE_CAP_INT64_DIVMOD:
   ret

Re: [Mesa-dev] [PATCH] [RFC] radv: enable transfer queues on CIK using SDMA engine.

2017-02-06 Thread Nicolai Hähnle
It may be time to start thinking about whether CS emit functions can be 
shared between radv and the gallium winsys. This code has seen a lot of 
subtle bug fixes over time, so duplication is not very nice.


Nicolai

On 07.02.2017 07:01, Dave Airlie wrote:

From: Dave Airlie 

This enables a transfer queue using the SDMA engine on
CIK/VI/Polaris GPUs.

TODO:
decide what to do with HW limitations from radeonsi
(fail to record?)
add linear bounds check to the buffer->image copies

dEQP-VK.synchronization.op.multi_queue.fence.*:
 Passed:1294/2688 (48.1%)
 Failed:0/2688 (0.0%)
 Not supported: 1394/2688 (51.9%)
 Warnings:  0/2688 (0.0%)

Signed-off-by: Dave Airlie 
---
 src/amd/vulkan/Makefile.sources   |   1 +
 src/amd/vulkan/radv_cik_sdma.c| 747 ++
 src/amd/vulkan/radv_cmd_buffer.c  |   7 +
 src/amd/vulkan/radv_device.c  |  20 +
 src/amd/vulkan/radv_meta_buffer.c |  42 +-
 src/amd/vulkan/radv_meta_copy.c   |  19 +
 src/amd/vulkan/radv_private.h |  38 ++
 src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c |  12 +-
 8 files changed, 877 insertions(+), 9 deletions(-)
 create mode 100644 src/amd/vulkan/radv_cik_sdma.c

diff --git a/src/amd/vulkan/Makefile.sources b/src/amd/vulkan/Makefile.sources
index 425a00f..6cd9621 100644
--- a/src/amd/vulkan/Makefile.sources
+++ b/src/amd/vulkan/Makefile.sources
@@ -31,6 +31,7 @@ RADV_WS_AMDGPU_FILES := \
winsys/amdgpu/radv_amdgpu_winsys_public.h

 VULKAN_FILES := \
+   radv_cik_sdma.c \
radv_cmd_buffer.c \
radv_cs.h \
radv_device.c \
diff --git a/src/amd/vulkan/radv_cik_sdma.c b/src/amd/vulkan/radv_cik_sdma.c
new file mode 100644
index 000..1229d9c
--- /dev/null
+++ b/src/amd/vulkan/radv_cik_sdma.c
@@ -0,0 +1,747 @@
+/*
+ * Copyright © 2016 Red Hat.
+ *
+ * based on cik_sdma.c:
+ * Copyright 2014,2015 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+#include "radv_private.h"
+#include "sid.h"
+#include "vk_format.h"
+#include "radv_cs.h"
+
+static VkFormat get_format_from_aspect_mask(VkImageAspectFlags aspectMask,
+   VkFormat format)
+{
+   if (aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT)
+   format = vk_format_depth_only(format);
+   else if (aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT)
+   format = vk_format_stencil_only(format);
+   return format;
+}
+
+static unsigned minify_as_blocks(unsigned width, unsigned level, unsigned 
blk_w)
+{
+   width = radv_minify(width, level);
+   return DIV_ROUND_UP(width, blk_w);
+}
+
+static const struct radeon_surf_level *get_base_level_info(const struct 
radv_image *img,
+  VkImageAspectFlags 
aspectMask, int base_mip_level)
+{
+   if (aspectMask == VK_IMAGE_ASPECT_STENCIL_BIT)
+   return &img->surface.stencil_level[base_mip_level];
+   return &img->surface.level[base_mip_level];
+}
+
+static void get_image_info(struct radv_cmd_buffer *cmd_buffer,
+  const struct radv_image *img,
+  const VkImageSubresourceLayers *subres,
+  uint64_t *va_p, uint32_t *bpp_p, uint32_t *pitch, 
uint32_t *slice_pitch)
+{
+   const struct radeon_surf_level *base_level = get_base_level_info(img, 
subres->aspectMask,
+
subres->mipLevel);
+   VkFormat format = get_format_from_aspect_mask(subres->aspectMask, 
img->vk_format);
+   uint32_t bpp = vk_format_get_blocksize(format);
+   uint64_t va = cmd_buffer->device->ws->buffer_get_va(img->bo);
+
+   va += img->offset;
+   *pitch = base_level->nblk_x;
+   *slice_pitch = base_level->slice_size / bpp;
+   if (bpp_p)
+ 

  1   2   3   4   5   6   7   8   9   10   >