I have my doubts that this code still works on SI.

Signed-off-by: Christian König <deathsim...@vodafone.de>
---
 src/gallium/drivers/radeonsi/Makefile.sources      |    1 +
 .../drivers/radeonsi/evergreen_hw_context.c        |   39 ---
 src/gallium/drivers/radeonsi/r600.h                |    2 -
 src/gallium/drivers/radeonsi/r600_hw_context.c     |  132 +---------
 .../drivers/radeonsi/r600_hw_context_priv.h        |    7 -
 src/gallium/drivers/radeonsi/radeonsi_pipe.c       |    7 +
 src/gallium/drivers/radeonsi/si_state.c            |   68 -----
 src/gallium/drivers/radeonsi/si_state.h            |   13 +
 src/gallium/drivers/radeonsi/si_state_draw.c       |    2 +
 src/gallium/drivers/radeonsi/si_state_streamout.c  |  271 ++++++++++++++++++++
 10 files changed, 301 insertions(+), 241 deletions(-)
 create mode 100644 src/gallium/drivers/radeonsi/si_state_streamout.c

diff --git a/src/gallium/drivers/radeonsi/Makefile.sources 
b/src/gallium/drivers/radeonsi/Makefile.sources
index 8e27b6c..630afb8 100644
--- a/src/gallium/drivers/radeonsi/Makefile.sources
+++ b/src/gallium/drivers/radeonsi/Makefile.sources
@@ -12,4 +12,5 @@ C_SOURCES := \
        r600_state_common.c \
        radeonsi_pm4.c \
        si_state.c \
+       si_state_streamout.c \
        si_state_draw.c
diff --git a/src/gallium/drivers/radeonsi/evergreen_hw_context.c 
b/src/gallium/drivers/radeonsi/evergreen_hw_context.c
index d071617..56b068f 100644
--- a/src/gallium/drivers/radeonsi/evergreen_hw_context.c
+++ b/src/gallium/drivers/radeonsi/evergreen_hw_context.c
@@ -97,42 +97,3 @@ void si_context_draw(struct r600_context *ctx, const struct 
r600_draw *draw)
        }
        cs->cdw += ndwords;
 }
-
-void evergreen_flush_vgt_streamout(struct r600_context *ctx)
-{
-       struct radeon_winsys_cs *cs = ctx->cs;
-
-       cs->buf[cs->cdw++] = PKT3(PKT3_SET_CONFIG_REG, 1, 0);
-       cs->buf[cs->cdw++] = (R_0084FC_CP_STRMOUT_CNTL - SI_CONFIG_REG_OFFSET) 
>> 2;
-       cs->buf[cs->cdw++] = 0;
-
-       cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE, 0, 0);
-       cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_SO_VGTSTREAMOUT_FLUSH) | 
EVENT_INDEX(0);
-
-       cs->buf[cs->cdw++] = PKT3(PKT3_WAIT_REG_MEM, 5, 0);
-       cs->buf[cs->cdw++] = WAIT_REG_MEM_EQUAL; /* wait until the register is 
equal to the reference value */
-       cs->buf[cs->cdw++] = R_0084FC_CP_STRMOUT_CNTL >> 2;  /* register */
-       cs->buf[cs->cdw++] = 0;
-       cs->buf[cs->cdw++] = S_0084FC_OFFSET_UPDATE_DONE(1); /* reference value 
*/
-       cs->buf[cs->cdw++] = S_0084FC_OFFSET_UPDATE_DONE(1); /* mask */
-       cs->buf[cs->cdw++] = 4; /* poll interval */
-}
-
-void evergreen_set_streamout_enable(struct r600_context *ctx, unsigned 
buffer_enable_bit)
-{
-       struct radeon_winsys_cs *cs = ctx->cs;
-
-       if (buffer_enable_bit) {
-               cs->buf[cs->cdw++] = PKT3(PKT3_SET_CONTEXT_REG, 1, 0);
-               cs->buf[cs->cdw++] = (R_028B94_VGT_STRMOUT_CONFIG - 
SI_CONTEXT_REG_OFFSET) >> 2;
-               cs->buf[cs->cdw++] = S_028B94_STREAMOUT_0_EN(1);
-
-               cs->buf[cs->cdw++] = PKT3(PKT3_SET_CONTEXT_REG, 1, 0);
-               cs->buf[cs->cdw++] = (R_028B98_VGT_STRMOUT_BUFFER_CONFIG - 
SI_CONTEXT_REG_OFFSET) >> 2;
-               cs->buf[cs->cdw++] = 
S_028B98_STREAM_0_BUFFER_EN(buffer_enable_bit);
-       } else {
-               cs->buf[cs->cdw++] = PKT3(PKT3_SET_CONTEXT_REG, 1, 0);
-               cs->buf[cs->cdw++] = (R_028B94_VGT_STRMOUT_CONFIG - 
SI_CONTEXT_REG_OFFSET) >> 2;
-               cs->buf[cs->cdw++] = S_028B94_STREAMOUT_0_EN(0);
-       }
-}
diff --git a/src/gallium/drivers/radeonsi/r600.h 
b/src/gallium/drivers/radeonsi/r600.h
index 610b9da..f34d1ff 100644
--- a/src/gallium/drivers/radeonsi/r600.h
+++ b/src/gallium/drivers/radeonsi/r600.h
@@ -126,8 +126,6 @@ void r600_query_predication(struct r600_context *ctx, 
struct r600_query *query,
 void r600_context_emit_fence(struct r600_context *ctx, struct si_resource 
*fence,
                              unsigned offset, unsigned value);
 
-void r600_context_streamout_begin(struct r600_context *ctx);
-void r600_context_streamout_end(struct r600_context *ctx);
 void r600_context_draw_opaque_count(struct r600_context *ctx, struct 
r600_so_target *t);
 void r600_need_cs_space(struct r600_context *ctx, unsigned num_dw, boolean 
count_draw_in);
 
diff --git a/src/gallium/drivers/radeonsi/r600_hw_context.c 
b/src/gallium/drivers/radeonsi/r600_hw_context.c
index 509a8bf..6765ef8 100644
--- a/src/gallium/drivers/radeonsi/r600_hw_context.c
+++ b/src/gallium/drivers/radeonsi/r600_hw_context.c
@@ -182,7 +182,10 @@ void r600_context_flush(struct r600_context *ctx, unsigned 
flags)
        struct radeon_winsys_cs *cs = ctx->cs;
        struct r600_block *enable_block = NULL;
        bool queries_suspended = false;
+
+#if 0
        bool streamout_suspended = false;
+#endif
 
        if (!cs->cdw)
                return;
@@ -193,10 +196,12 @@ void r600_context_flush(struct r600_context *ctx, 
unsigned flags)
                queries_suspended = true;
        }
 
+#if 0
        if (ctx->num_cs_dw_streamout_end) {
                r600_context_streamout_end(ctx);
                streamout_suspended = true;
        }
+#endif
 
        r600_flush_framebuffer(ctx, true);
 
@@ -213,10 +218,12 @@ void r600_context_flush(struct r600_context *ctx, 
unsigned flags)
        ctx->pm4_dirty_cdwords = 0;
        ctx->flags = 0;
 
+#if 0
        if (streamout_suspended) {
                ctx->streamout_start = TRUE;
                ctx->streamout_append_bitmask = ~0;
        }
+#endif
 
        /* resume queries */
        if (queries_suspended) {
@@ -638,131 +645,6 @@ void r600_context_queries_resume(struct r600_context *ctx)
        }
 }
 
-void r600_context_streamout_begin(struct r600_context *ctx)
-{
-       struct radeon_winsys_cs *cs = ctx->cs;
-       struct r600_so_target **t = ctx->so_targets;
-       unsigned *strides = ctx->vs_shader_so_strides;
-       unsigned buffer_en, i;
-
-       buffer_en = (ctx->num_so_targets >= 1 && t[0] ? 1 : 0) |
-                   (ctx->num_so_targets >= 2 && t[1] ? 2 : 0) |
-                   (ctx->num_so_targets >= 3 && t[2] ? 4 : 0) |
-                   (ctx->num_so_targets >= 4 && t[3] ? 8 : 0);
-
-       ctx->num_cs_dw_streamout_end =
-               12 + /* flush_vgt_streamout */
-               util_bitcount(buffer_en) * 8 +
-               3;
-
-       r600_need_cs_space(ctx,
-                          12 + /* flush_vgt_streamout */
-                          6 + /* enables */
-                          util_bitcount(buffer_en & 
ctx->streamout_append_bitmask) * 8 +
-                          util_bitcount(buffer_en & 
~ctx->streamout_append_bitmask) * 6 +
-                          ctx->num_cs_dw_streamout_end, TRUE);
-
-       if (ctx->chip_class >= CAYMAN) {
-               evergreen_flush_vgt_streamout(ctx);
-               evergreen_set_streamout_enable(ctx, buffer_en);
-       }
-
-       for (i = 0; i < ctx->num_so_targets; i++) {
-#if 0
-               if (t[i]) {
-                       t[i]->stride = strides[i];
-                       t[i]->so_index = i;
-
-                       cs->buf[cs->cdw++] = PKT3(PKT3_SET_CONTEXT_REG, 3, 0);
-                       cs->buf[cs->cdw++] = 
(R_028AD0_VGT_STRMOUT_BUFFER_SIZE_0 +
-                                                       16*i - 
SI_CONTEXT_REG_OFFSET) >> 2;
-                       cs->buf[cs->cdw++] = (t[i]->b.buffer_offset +
-                                                       t[i]->b.buffer_size) >> 
2; /* BUFFER_SIZE (in DW) */
-                       cs->buf[cs->cdw++] = strides[i] >> 2;              /* 
VTX_STRIDE (in DW) */
-                       cs->buf[cs->cdw++] = 0;                    /* 
BUFFER_BASE */
-
-                       cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, 0);
-                       cs->buf[cs->cdw++] =
-                               r600_context_bo_reloc(ctx, 
si_resource(t[i]->b.buffer),
-                                                     RADEON_USAGE_WRITE);
-
-                       if (ctx->streamout_append_bitmask & (1 << i)) {
-                               /* Append. */
-                               cs->buf[cs->cdw++] = 
PKT3(PKT3_STRMOUT_BUFFER_UPDATE, 4, 0);
-                               cs->buf[cs->cdw++] = STRMOUT_SELECT_BUFFER(i) |
-                                                              
STRMOUT_OFFSET_SOURCE(STRMOUT_OFFSET_FROM_MEM); /* control */
-                               cs->buf[cs->cdw++] = 0; /* unused */
-                               cs->buf[cs->cdw++] = 0; /* unused */
-                               cs->buf[cs->cdw++] = 0; /* src address lo */
-                               cs->buf[cs->cdw++] = 0; /* src address hi */
-
-                               cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, 0);
-                               cs->buf[cs->cdw++] =
-                                       r600_context_bo_reloc(ctx,  
t[i]->filled_size,
-                                                             
RADEON_USAGE_READ);
-                       } else {
-                               /* Start from the beginning. */
-                               cs->buf[cs->cdw++] = 
PKT3(PKT3_STRMOUT_BUFFER_UPDATE, 4, 0);
-                               cs->buf[cs->cdw++] = STRMOUT_SELECT_BUFFER(i) |
-                                                              
STRMOUT_OFFSET_SOURCE(STRMOUT_OFFSET_FROM_PACKET); /* control */
-                               cs->buf[cs->cdw++] = 0; /* unused */
-                               cs->buf[cs->cdw++] = 0; /* unused */
-                               cs->buf[cs->cdw++] = t[i]->b.buffer_offset >> 
2; /* buffer offset in DW */
-                               cs->buf[cs->cdw++] = 0; /* unused */
-                       }
-               }
-#endif
-       }
-}
-
-void r600_context_streamout_end(struct r600_context *ctx)
-{
-       struct radeon_winsys_cs *cs = ctx->cs;
-       struct r600_so_target **t = ctx->so_targets;
-       unsigned i, flush_flags = 0;
-
-       evergreen_flush_vgt_streamout(ctx);
-
-       for (i = 0; i < ctx->num_so_targets; i++) {
-#if 0
-               if (t[i]) {
-                       cs->buf[cs->cdw++] = PKT3(PKT3_STRMOUT_BUFFER_UPDATE, 
4, 0);
-                       cs->buf[cs->cdw++] = STRMOUT_SELECT_BUFFER(i) |
-                                                      
STRMOUT_OFFSET_SOURCE(STRMOUT_OFFSET_NONE) |
-                                                      
STRMOUT_STORE_BUFFER_FILLED_SIZE; /* control */
-                       cs->buf[cs->cdw++] = 0; /* dst address lo */
-                       cs->buf[cs->cdw++] = 0; /* dst address hi */
-                       cs->buf[cs->cdw++] = 0; /* unused */
-                       cs->buf[cs->cdw++] = 0; /* unused */
-
-                       cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, 0);
-                       cs->buf[cs->cdw++] =
-                               r600_context_bo_reloc(ctx,  t[i]->filled_size,
-                                                     RADEON_USAGE_WRITE);
-
-                       flush_flags |= S_0085F0_SO0_DEST_BASE_ENA(1) << i;
-               }
-#endif
-       }
-
-       evergreen_set_streamout_enable(ctx, 0);
-
-       ctx->atom_surface_sync.flush_flags |= flush_flags;
-       r600_atom_dirty(ctx, &ctx->atom_surface_sync.atom);
-
-       ctx->num_cs_dw_streamout_end = 0;
-
-       /* XXX print some debug info */
-       for (i = 0; i < ctx->num_so_targets; i++) {
-               if (!t[i])
-                       continue;
-
-               uint32_t *ptr = ctx->ws->buffer_map(t[i]->filled_size->cs_buf, 
ctx->cs, RADEON_USAGE_READ);
-               printf("FILLED_SIZE%i: %u\n", i, *ptr);
-               ctx->ws->buffer_unmap(t[i]->filled_size->cs_buf);
-       }
-}
-
 void r600_context_draw_opaque_count(struct r600_context *ctx, struct 
r600_so_target *t)
 {
        struct radeon_winsys_cs *cs = ctx->cs;
diff --git a/src/gallium/drivers/radeonsi/r600_hw_context_priv.h 
b/src/gallium/drivers/radeonsi/r600_hw_context_priv.h
index 6d458d4..c2a15eb 100644
--- a/src/gallium/drivers/radeonsi/r600_hw_context_priv.h
+++ b/src/gallium/drivers/radeonsi/r600_hw_context_priv.h
@@ -35,13 +35,6 @@
 #define PKT_COUNT_C                     0xC000FFFF
 #define PKT_COUNT_S(x)                  (((x) & 0x3FFF) << 16)
 
-/*
- * evergreen_hw_context.c
- */
-void evergreen_flush_vgt_streamout(struct r600_context *ctx);
-void evergreen_set_streamout_enable(struct r600_context *ctx, unsigned 
buffer_enable_bit);
-
-
 static INLINE unsigned r600_context_bo_reloc(struct r600_context *ctx, struct 
si_resource *rbo,
                                             enum radeon_bo_usage usage)
 {
diff --git a/src/gallium/drivers/radeonsi/radeonsi_pipe.c 
b/src/gallium/drivers/radeonsi/radeonsi_pipe.c
index 3c5eaf7..ad7e595 100644
--- a/src/gallium/drivers/radeonsi/radeonsi_pipe.c
+++ b/src/gallium/drivers/radeonsi/radeonsi_pipe.c
@@ -340,6 +340,7 @@ static int r600_get_param(struct pipe_screen* pscreen, enum 
pipe_cap param)
                return 0;
 
        /* Stream output. */
+#if 0
        case PIPE_CAP_MAX_STREAM_OUTPUT_BUFFERS:
                return debug_get_bool_option("R600_STREAMOUT", FALSE) ? 4 : 0;
        case PIPE_CAP_STREAM_OUTPUT_PAUSE_RESUME:
@@ -347,6 +348,12 @@ static int r600_get_param(struct pipe_screen* pscreen, 
enum pipe_cap param)
        case PIPE_CAP_MAX_STREAM_OUTPUT_SEPARATE_COMPONENTS:
        case PIPE_CAP_MAX_STREAM_OUTPUT_INTERLEAVED_COMPONENTS:
                return 16*4;
+#endif
+       case PIPE_CAP_MAX_STREAM_OUTPUT_BUFFERS:
+       case PIPE_CAP_STREAM_OUTPUT_PAUSE_RESUME:
+       case PIPE_CAP_MAX_STREAM_OUTPUT_SEPARATE_COMPONENTS:
+       case PIPE_CAP_MAX_STREAM_OUTPUT_INTERLEAVED_COMPONENTS:
+               return 0;
 
        /* Texturing. */
        case PIPE_CAP_MAX_TEXTURE_2D_LEVELS:
diff --git a/src/gallium/drivers/radeonsi/si_state.c 
b/src/gallium/drivers/radeonsi/si_state.c
index e8a7b77..1d6d214 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -2205,74 +2205,6 @@ static void si_set_index_buffer(struct pipe_context *ctx,
 }
 
 /*
- * Stream out
- */
-
-static struct pipe_stream_output_target *
-si_create_so_target(struct pipe_context *ctx,
-                   struct pipe_resource *buffer,
-                   unsigned buffer_offset,
-                   unsigned buffer_size)
-{
-       struct r600_context *rctx = (struct r600_context *)ctx;
-       struct r600_so_target *t;
-       void *ptr;
-
-       t = CALLOC_STRUCT(r600_so_target);
-       if (!t) {
-               return NULL;
-       }
-
-       t->b.reference.count = 1;
-       t->b.context = ctx;
-       pipe_resource_reference(&t->b.buffer, buffer);
-       t->b.buffer_offset = buffer_offset;
-       t->b.buffer_size = buffer_size;
-
-       t->filled_size = si_resource_create_custom(ctx->screen, 
PIPE_USAGE_STATIC, 4);
-       ptr = rctx->ws->buffer_map(t->filled_size->cs_buf, rctx->cs, 
PIPE_TRANSFER_WRITE);
-       memset(ptr, 0, t->filled_size->buf->size);
-       rctx->ws->buffer_unmap(t->filled_size->cs_buf);
-
-       return &t->b;
-}
-
-static void si_so_target_destroy(struct pipe_context *ctx,
-                                struct pipe_stream_output_target *target)
-{
-       struct r600_so_target *t = (struct r600_so_target*)target;
-       pipe_resource_reference(&t->b.buffer, NULL);
-       si_resource_reference(&t->filled_size, NULL);
-       FREE(t);
-}
-
-static void si_set_so_targets(struct pipe_context *ctx,
-                             unsigned num_targets,
-                             struct pipe_stream_output_target **targets,
-                             unsigned append_bitmask)
-{
-       struct r600_context *rctx = (struct r600_context *)ctx;
-       unsigned i;
-
-       /* Stop streamout. */
-       if (rctx->num_so_targets) {
-               r600_context_streamout_end(rctx);
-       }
-
-       /* Set the new targets. */
-       for (i = 0; i < num_targets; i++) {
-               pipe_so_target_reference((struct 
pipe_stream_output_target**)&rctx->so_targets[i], targets[i]);
-       }
-       for (; i < rctx->num_so_targets; i++) {
-               pipe_so_target_reference((struct 
pipe_stream_output_target**)&rctx->so_targets[i], NULL);
-       }
-
-       rctx->num_so_targets = num_targets;
-       rctx->streamout_start = num_targets != 0;
-       rctx->streamout_append_bitmask = append_bitmask;
-}
-
-/*
  * Misc
  */
 static void si_set_polygon_stipple(struct pipe_context *ctx,
diff --git a/src/gallium/drivers/radeonsi/si_state.h 
b/src/gallium/drivers/radeonsi/si_state.h
index 6729fd4..a69722c 100644
--- a/src/gallium/drivers/radeonsi/si_state.h
+++ b/src/gallium/drivers/radeonsi/si_state.h
@@ -132,6 +132,19 @@ bool si_is_format_supported(struct pipe_screen *screen,
 void si_init_state_functions(struct r600_context *rctx);
 void si_init_config(struct r600_context *rctx);
 
+/* si_state_streamout.c */
+struct pipe_stream_output_target *
+si_create_so_target(struct pipe_context *ctx,
+                   struct pipe_resource *buffer,
+                   unsigned buffer_offset,
+                   unsigned buffer_size);
+void si_so_target_destroy(struct pipe_context *ctx,
+                         struct pipe_stream_output_target *target);
+void si_set_so_targets(struct pipe_context *ctx,
+                      unsigned num_targets,
+                      struct pipe_stream_output_target **targets,
+                      unsigned append_bitmask);
+
 /* si_state_draw.c */
 void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *dinfo);
 
diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c 
b/src/gallium/drivers/radeonsi/si_state_draw.c
index 48a5f30..6670483 100644
--- a/src/gallium/drivers/radeonsi/si_state_draw.c
+++ b/src/gallium/drivers/radeonsi/si_state_draw.c
@@ -528,11 +528,13 @@ void si_draw_vbo(struct pipe_context *ctx, const struct 
pipe_draw_info *dinfo)
        si_pm4_emit_dirty(rctx);
        rctx->pm4_dirty_cdwords = 0;
 
+#if 0
        /* Enable stream out if needed. */
        if (rctx->streamout_start) {
                r600_context_streamout_begin(rctx);
                rctx->streamout_start = FALSE;
        }
+#endif
 
        si_context_draw(rctx, &rdraw);
 
diff --git a/src/gallium/drivers/radeonsi/si_state_streamout.c 
b/src/gallium/drivers/radeonsi/si_state_streamout.c
new file mode 100644
index 0000000..3410eb6
--- /dev/null
+++ b/src/gallium/drivers/radeonsi/si_state_streamout.c
@@ -0,0 +1,271 @@
+/*
+ * Copyright 2012 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ *      Christian König <christian.koe...@amd.com>
+ */
+
+#include "radeonsi_pipe.h"
+#include "si_state.h"
+
+/*
+ * Stream out
+ */
+
+#if 0
+void si_context_streamout_begin(struct r600_context *ctx)
+{
+       struct radeon_winsys_cs *cs = ctx->cs;
+       struct si_so_target **t = ctx->so_targets;
+       unsigned *strides = ctx->vs_shader_so_strides;
+       unsigned buffer_en, i;
+
+       buffer_en = (ctx->num_so_targets >= 1 && t[0] ? 1 : 0) |
+                   (ctx->num_so_targets >= 2 && t[1] ? 2 : 0) |
+                   (ctx->num_so_targets >= 3 && t[2] ? 4 : 0) |
+                   (ctx->num_so_targets >= 4 && t[3] ? 8 : 0);
+
+       ctx->num_cs_dw_streamout_end =
+               12 + /* flush_vgt_streamout */
+               util_bitcount(buffer_en) * 8 +
+               3;
+
+       si_need_cs_space(ctx,
+                          12 + /* flush_vgt_streamout */
+                          6 + /* enables */
+                          util_bitcount(buffer_en & 
ctx->streamout_append_bitmask) * 8 +
+                          util_bitcount(buffer_en & 
~ctx->streamout_append_bitmask) * 6 +
+                          ctx->num_cs_dw_streamout_end, TRUE);
+
+       if (ctx->chip_class >= CAYMAN) {
+               evergreen_flush_vgt_streamout(ctx);
+               evergreen_set_streamout_enable(ctx, buffer_en);
+       }
+
+       for (i = 0; i < ctx->num_so_targets; i++) {
+#if 0
+               if (t[i]) {
+                       t[i]->stride = strides[i];
+                       t[i]->so_index = i;
+
+                       cs->buf[cs->cdw++] = PKT3(PKT3_SET_CONTEXT_REG, 3, 0);
+                       cs->buf[cs->cdw++] = 
(R_028AD0_VGT_STRMOUT_BUFFER_SIZE_0 +
+                                                       16*i - 
SI_CONTEXT_REG_OFFSET) >> 2;
+                       cs->buf[cs->cdw++] = (t[i]->b.buffer_offset +
+                                                       t[i]->b.buffer_size) >> 
2; /* BUFFER_SIZE (in DW) */
+                       cs->buf[cs->cdw++] = strides[i] >> 2;              /* 
VTX_STRIDE (in DW) */
+                       cs->buf[cs->cdw++] = 0;                    /* 
BUFFER_BASE */
+
+                       cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, 0);
+                       cs->buf[cs->cdw++] =
+                               si_context_bo_reloc(ctx, 
si_resource(t[i]->b.buffer),
+                                                     RADEON_USAGE_WRITE);
+
+                       if (ctx->streamout_append_bitmask & (1 << i)) {
+                               /* Append. */
+                               cs->buf[cs->cdw++] = 
PKT3(PKT3_STRMOUT_BUFFER_UPDATE, 4, 0);
+                               cs->buf[cs->cdw++] = STRMOUT_SELECT_BUFFER(i) |
+                                                              
STRMOUT_OFFSET_SOURCE(STRMOUT_OFFSET_FROM_MEM); /* control */
+                               cs->buf[cs->cdw++] = 0; /* unused */
+                               cs->buf[cs->cdw++] = 0; /* unused */
+                               cs->buf[cs->cdw++] = 0; /* src address lo */
+                               cs->buf[cs->cdw++] = 0; /* src address hi */
+
+                               cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, 0);
+                               cs->buf[cs->cdw++] =
+                                       si_context_bo_reloc(ctx,  
t[i]->filled_size,
+                                                             
RADEON_USAGE_READ);
+                       } else {
+                               /* Start from the beginning. */
+                               cs->buf[cs->cdw++] = 
PKT3(PKT3_STRMOUT_BUFFER_UPDATE, 4, 0);
+                               cs->buf[cs->cdw++] = STRMOUT_SELECT_BUFFER(i) |
+                                                              
STRMOUT_OFFSET_SOURCE(STRMOUT_OFFSET_FROM_PACKET); /* control */
+                               cs->buf[cs->cdw++] = 0; /* unused */
+                               cs->buf[cs->cdw++] = 0; /* unused */
+                               cs->buf[cs->cdw++] = t[i]->b.buffer_offset >> 
2; /* buffer offset in DW */
+                               cs->buf[cs->cdw++] = 0; /* unused */
+                       }
+               }
+#endif
+       }
+}
+
+void si_context_streamout_end(struct r600_context *ctx)
+{
+       struct radeon_winsys_cs *cs = ctx->cs;
+       struct si_so_target **t = ctx->so_targets;
+       unsigned i, flush_flags = 0;
+
+       evergreen_flush_vgt_streamout(ctx);
+
+       for (i = 0; i < ctx->num_so_targets; i++) {
+#if 0
+               if (t[i]) {
+                       cs->buf[cs->cdw++] = PKT3(PKT3_STRMOUT_BUFFER_UPDATE, 
4, 0);
+                       cs->buf[cs->cdw++] = STRMOUT_SELECT_BUFFER(i) |
+                                                      
STRMOUT_OFFSET_SOURCE(STRMOUT_OFFSET_NONE) |
+                                                      
STRMOUT_STORE_BUFFER_FILLED_SIZE; /* control */
+                       cs->buf[cs->cdw++] = 0; /* dst address lo */
+                       cs->buf[cs->cdw++] = 0; /* dst address hi */
+                       cs->buf[cs->cdw++] = 0; /* unused */
+                       cs->buf[cs->cdw++] = 0; /* unused */
+
+                       cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, 0);
+                       cs->buf[cs->cdw++] =
+                               si_context_bo_reloc(ctx,  t[i]->filled_size,
+                                                     RADEON_USAGE_WRITE);
+
+                       flush_flags |= S_0085F0_SO0_DEST_BASE_ENA(1) << i;
+               }
+#endif
+       }
+
+       evergreen_set_streamout_enable(ctx, 0);
+
+       ctx->atom_surface_sync.flush_flags |= flush_flags;
+       si_atom_dirty(ctx, &ctx->atom_surface_sync.atom);
+
+       ctx->num_cs_dw_streamout_end = 0;
+
+       /* XXX print some debug info */
+       for (i = 0; i < ctx->num_so_targets; i++) {
+               if (!t[i])
+                       continue;
+
+               uint32_t *ptr = ctx->ws->buffer_map(t[i]->filled_size->cs_buf, 
ctx->cs, RADEON_USAGE_READ);
+               printf("FILLED_SIZE%i: %u\n", i, *ptr);
+               ctx->ws->buffer_unmap(t[i]->filled_size->cs_buf);
+       }
+}
+
+void evergreen_flush_vgt_streamout(struct si_context *ctx)
+{
+       struct radeon_winsys_cs *cs = ctx->cs;
+
+       cs->buf[cs->cdw++] = PKT3(PKT3_SET_CONFIG_REG, 1, 0);
+       cs->buf[cs->cdw++] = (R_0084FC_CP_STRMOUT_CNTL - SI_CONFIG_REG_OFFSET) 
>> 2;
+       cs->buf[cs->cdw++] = 0;
+
+       cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE, 0, 0);
+       cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_SO_VGTSTREAMOUT_FLUSH) | 
EVENT_INDEX(0);
+
+       cs->buf[cs->cdw++] = PKT3(PKT3_WAIT_REG_MEM, 5, 0);
+       cs->buf[cs->cdw++] = WAIT_REG_MEM_EQUAL; /* wait until the register is 
equal to the reference value */
+       cs->buf[cs->cdw++] = R_0084FC_CP_STRMOUT_CNTL >> 2;  /* register */
+       cs->buf[cs->cdw++] = 0;
+       cs->buf[cs->cdw++] = S_0084FC_OFFSET_UPDATE_DONE(1); /* reference value 
*/
+       cs->buf[cs->cdw++] = S_0084FC_OFFSET_UPDATE_DONE(1); /* mask */
+       cs->buf[cs->cdw++] = 4; /* poll interval */
+}
+
+void evergreen_set_streamout_enable(struct si_context *ctx, unsigned 
buffer_enable_bit)
+{
+       struct radeon_winsys_cs *cs = ctx->cs;
+
+       if (buffer_enable_bit) {
+               cs->buf[cs->cdw++] = PKT3(PKT3_SET_CONTEXT_REG, 1, 0);
+               cs->buf[cs->cdw++] = (R_028B94_VGT_STRMOUT_CONFIG - 
SI_CONTEXT_REG_OFFSET) >> 2;
+               cs->buf[cs->cdw++] = S_028B94_STREAMOUT_0_EN(1);
+
+               cs->buf[cs->cdw++] = PKT3(PKT3_SET_CONTEXT_REG, 1, 0);
+               cs->buf[cs->cdw++] = (R_028B98_VGT_STRMOUT_BUFFER_CONFIG - 
SI_CONTEXT_REG_OFFSET) >> 2;
+               cs->buf[cs->cdw++] = 
S_028B98_STREAM_0_BUFFER_EN(buffer_enable_bit);
+       } else {
+               cs->buf[cs->cdw++] = PKT3(PKT3_SET_CONTEXT_REG, 1, 0);
+               cs->buf[cs->cdw++] = (R_028B94_VGT_STRMOUT_CONFIG - 
SI_CONTEXT_REG_OFFSET) >> 2;
+               cs->buf[cs->cdw++] = S_028B94_STREAMOUT_0_EN(0);
+       }
+}
+
+#endif
+
+struct pipe_stream_output_target *
+si_create_so_target(struct pipe_context *ctx,
+                   struct pipe_resource *buffer,
+                   unsigned buffer_offset,
+                   unsigned buffer_size)
+{
+#if 0
+       struct si_context *rctx = (struct r600_context *)ctx;
+       struct si_so_target *t;
+       void *ptr;
+
+       t = CALLOC_STRUCT(si_so_target);
+       if (!t) {
+               return NULL;
+       }
+
+       t->b.reference.count = 1;
+       t->b.context = ctx;
+       pipe_resource_reference(&t->b.buffer, buffer);
+       t->b.buffer_offset = buffer_offset;
+       t->b.buffer_size = buffer_size;
+
+       t->filled_size = si_resource_create_custom(ctx->screen, 
PIPE_USAGE_STATIC, 4);
+       ptr = rctx->ws->buffer_map(t->filled_size->cs_buf, rctx->cs, 
PIPE_TRANSFER_WRITE);
+       memset(ptr, 0, t->filled_size->buf->size);
+       rctx->ws->buffer_unmap(t->filled_size->cs_buf);
+
+       return &t->b;
+#endif
+       return NULL;
+}
+
+void si_so_target_destroy(struct pipe_context *ctx,
+                         struct pipe_stream_output_target *target)
+{
+#if 0
+       struct si_so_target *t = (struct r600_so_target*)target;
+       pipe_resource_reference(&t->b.buffer, NULL);
+       si_resource_reference(&t->filled_size, NULL);
+       FREE(t);
+#endif
+}
+
+void si_set_so_targets(struct pipe_context *ctx,
+                      unsigned num_targets,
+                      struct pipe_stream_output_target **targets,
+                      unsigned append_bitmask)
+{
+       assert(num_targets == 0);
+#if 0
+       struct si_context *rctx = (struct r600_context *)ctx;
+       unsigned i;
+
+       /* Stop streamout. */
+       if (rctx->num_so_targets) {
+               si_context_streamout_end(rctx);
+       }
+
+       /* Set the new targets. */
+       for (i = 0; i < num_targets; i++) {
+               pipe_so_target_reference((struct 
pipe_stream_output_target**)&rctx->so_targets[i], targets[i]);
+       }
+       for (; i < rctx->num_so_targets; i++) {
+               pipe_so_target_reference((struct 
pipe_stream_output_target**)&rctx->so_targets[i], NULL);
+       }
+
+       rctx->num_so_targets = num_targets;
+       rctx->streamout_start = num_targets != 0;
+       rctx->streamout_append_bitmask = append_bitmask;
+#endif
+}
-- 
1.7.9.5

_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to