autogen.sh | 4 configure.ac | 4 man/radeon.man | 25 + src/Makefile.am | 1 src/ati_pciids_gen.h | 59 +++ src/cayman_shader.c | 590 ++++++++++++++++++++++++------------- src/drmmode_display.c | 280 +++++++++++++++-- src/drmmode_display.h | 11 src/evergreen_accel.c | 12 src/evergreen_exa.c | 287 +++++++++++++----- src/evergreen_shader.c | 596 ++++++++++++++++++++++++-------------- src/evergreen_state.h | 2 src/pcidb/ati_pciids.csv | 107 +++++- src/r600_exa.c | 14 src/radeon.h | 32 +- src/radeon_bo_helper.c | 70 ++++ src/radeon_bo_helper.h | 7 src/radeon_chipinfo_gen.h | 107 +++++- src/radeon_chipset_gen.h | 59 +++ src/radeon_dri2.c | 286 +++++++++++++++--- src/radeon_driver.c | 100 ------ src/radeon_exa.c | 65 ---- src/radeon_exa_funcs.c | 3 src/radeon_exa_render.c | 12 src/radeon_glamor.c | 81 ++++- src/radeon_kms.c | 85 ++++- src/radeon_pci_chipset_gen.h | 59 +++ src/radeon_pci_device_match_gen.h | 59 +++ src/radeon_probe.h | 7 src/radeon_video.c | 6 src/radeon_video.h | 1 31 files changed, 2223 insertions(+), 808 deletions(-)
New commits: commit 9c97cca5c24409ca8447c99f051a12fd2d494e79 Author: Maarten Lankhorst <maarten.lankho...@canonical.com> Date: Wed Aug 7 10:48:17 2013 +0200 radeon: bump version for release Signed-off-by: Maarten Lankhorst <maarten.lankho...@canonical.com> diff --git a/configure.ac b/configure.ac index ed04028..ac202e1 100644 --- a/configure.ac +++ b/configure.ac @@ -23,7 +23,7 @@ # Initialize Autoconf AC_PREREQ([2.60]) AC_INIT([xf86-video-ati], - [7.1.99], + [7.2.0], [https://bugs.freedesktop.org/enter_bug.cgi?product=xorg], [xf86-video-ati]) commit 16270cfb202ab67dd152644ef019b2f1ee4d0341 Author: Maarten Lankhorst <maarten.lankho...@canonical.com> Date: Wed Aug 7 10:29:33 2013 +0200 add bicubic_table.py to EXTRA_DIST Signed-off-by: Maarten Lankhorst <maarten.lankho...@canonical.com> diff --git a/src/Makefile.am b/src/Makefile.am index 6b7171e..e23dc1d 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -86,6 +86,7 @@ EXTRA_DIST = \ ati.h \ ativersion.h \ bicubic_table.h \ + bicubic_table.py \ radeon_bo_helper.h \ radeon_drm.h \ radeon_exa_render.c \ commit 2cb9197ca7a337c911f38b5de562a2364b922b86 Author: Maarten Lankhorst <maarten.lankho...@canonical.com> Date: Wed Aug 7 10:28:52 2013 +0200 kill unused radeon_driver.c Signed-off-by: Maarten Lankhorst <maarten.lankho...@canonical.com> diff --git a/src/radeon.h b/src/radeon.h index 912e24d..4660893 100644 --- a/src/radeon.h +++ b/src/radeon.h @@ -497,9 +497,6 @@ extern void RADEONWaitForVLine(ScrnInfoPtr pScrn, PixmapPtr pPix, xf86CrtcPtr crtc, int start, int stop); -/* radeon_driver.c */ -extern RADEONEntPtr RADEONEntPriv(ScrnInfoPtr pScrn); - /* radeon_exa.c */ extern unsigned eg_tile_split(unsigned tile_split); extern Bool radeon_transform_is_affine_or_scaled(PictTransformPtr t); @@ -528,6 +525,7 @@ extern void radeon_ddx_cs_start(ScrnInfoPtr pScrn, int num, const char *file, const char *func, int line); void radeon_kms_update_vram_limit(ScrnInfoPtr pScrn, int new_fb_size); +extern RADEONEntPtr RADEONEntPriv(ScrnInfoPtr pScrn); drmVBlankSeqType radeon_populate_vbl_request_type(xf86CrtcPtr crtc); diff --git a/src/radeon_driver.c b/src/radeon_driver.c deleted file mode 100644 index 2f085a8..0000000 --- a/src/radeon_driver.c +++ /dev/null @@ -1,100 +0,0 @@ -/* - * Copyright 2000 ATI Technologies Inc., Markham, Ontario, and - * VA Linux Systems Inc., Fremont, California. - * - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation on the rights to use, copy, modify, merge, - * publish, distribute, sublicense, and/or sell copies of the Software, - * and to permit persons to whom the Software is furnished to do so, - * subject to the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial - * portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NON-INFRINGEMENT. IN NO EVENT SHALL ATI, VA LINUX SYSTEMS AND/OR - * THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -/* - * Authors: - * Kevin E. Martin <mar...@xfree86.org> - * Rickard E. Faith <fa...@valinux.com> - * Alan Hourihane <al...@fairlite.demon.co.uk> - * - * Credits: - * - * Thanks to Ani Joshi <ajo...@shell.unixbox.com> for providing source - * code to his Radeon driver. Portions of this file are based on the - * initialization code for that driver. - * - * References: - * - * !!!! FIXME !!!! - * RAGE 128 VR/ RAGE 128 GL Register Reference Manual (Technical - * Reference Manual P/N RRG-G04100-C Rev. 0.04), ATI Technologies: April - * 1999. - * - * RAGE 128 Software Development Manual (Technical Reference Manual P/N - * SDK-G04000 Rev. 0.01), ATI Technologies: June 1999. - * - * This server does not yet support these XFree86 4.0 features: - * !!!! FIXME !!!! - * DDC1 & DDC2 - * shadowfb - * overlay planes - * - * Modified by Marc Aurele La France (t...@xfree86.org) for ATI driver merge. - * - * Mergedfb and pseudo xinerama support added by Alex Deucher (ag...@yahoo.com) - * based on the sis driver by Thomas Winischhofer. - * - */ - -#include <string.h> -#include <stdio.h> - - /* Driver data structures */ -#include "radeon.h" -#include "radeon_reg.h" -#include "radeon_probe.h" -#include "radeon_version.h" - -#include "fb.h" - - /* colormap initialization */ -#include "micmap.h" -#include "dixstruct.h" - - /* X and server generic header files */ -#include "xf86.h" -#include "xf86_OSproc.h" -#include "xf86RandR12.h" -#include "xf86cmap.h" - -#include "shadow.h" - /* vgaHW definitions */ -#ifdef HAVE_XEXTPROTO_71 -#include <X11/extensions/dpmsconst.h> -#else -#define DPMS_SERVER -#include <X11/extensions/dpms.h> -#endif - - -#include "atipciids.h" - commit c5cbfcf575b0b4aea6f797558ae974c1453c8e07 Author: Alex Deucher <alexander.deuc...@amd.com> Date: Tue Jul 30 10:08:25 2013 -0400 drmmode: add support for multi-screen reverse optimus Initial reverse optimus didn't consider multiple screens, so this overhauls the code to use the new X server interface, and allows for multiple outputs on the dGPU to be used with the iGPU doing the rendering. Ported from Dave's nouveau patch. Signed-off-by: Alex Deucher <alexander.deuc...@amd.com> diff --git a/src/drmmode_display.c b/src/drmmode_display.c index 244a98f..1df104d 100644 --- a/src/drmmode_display.c +++ b/src/drmmode_display.c @@ -521,14 +521,15 @@ drmmode_set_mode_major(xf86CrtcPtr crtc, DisplayModePtr mode, } crtc->funcs->gamma_set(crtc, crtc->gamma_red, crtc->gamma_green, crtc->gamma_blue, crtc->gamma_size); - + drmmode_ConvertToKMode(crtc->scrn, &kmode, mode); fb_id = drmmode->fb_id; #ifdef RADEON_PIXMAP_SHARING - if (crtc->randr_crtc && crtc->randr_crtc->scanout_pixmap) - x = y = 0; - else + if (crtc->randr_crtc && crtc->randr_crtc->scanout_pixmap) { + x = drmmode_crtc->scanout_pixmap_x; + y = 0; + } else #endif if (drmmode_crtc->rotate_fb_id) { fb_id = drmmode_crtc->rotate_fb_id; @@ -741,25 +742,58 @@ drmmode_set_scanout_pixmap(xf86CrtcPtr crtc, PixmapPtr ppix) { ScreenPtr screen = xf86ScrnToScreen(crtc->scrn); PixmapPtr screenpix = screen->GetScreenPixmap(screen); + xf86CrtcConfigPtr xf86_config = XF86_CRTC_CONFIG_PTR(crtc->scrn); + drmmode_crtc_private_ptr drmmode_crtc = crtc->driver_private; + int c, total_width = 0, max_height = 0, this_x = 0; if (!ppix) { if (crtc->randr_crtc->scanout_pixmap) PixmapStopDirtyTracking(crtc->randr_crtc->scanout_pixmap, screenpix); + drmmode_crtc->scanout_pixmap_x = 0; return TRUE; } - if (ppix->drawable.width > screenpix->drawable.width || - ppix->drawable.height > screenpix->drawable.height) { + /* iterate over all the attached crtcs - + work out bounding box */ + for (c = 0; c < xf86_config->num_crtc; c++) { + xf86CrtcPtr iter = xf86_config->crtc[c]; + if (!iter->enabled && iter != crtc) + continue; + if (iter == crtc) { + this_x = total_width; + total_width += ppix->drawable.width; + if (max_height < ppix->drawable.height) + max_height = ppix->drawable.height; + } else { + total_width += iter->mode.HDisplay; + if (max_height < iter->mode.VDisplay) + max_height = iter->mode.VDisplay; + } +#ifndef HAS_DIRTYTRACKING2 + if (iter != crtc) { + ErrorF("Cannot do multiple crtcs without X server dirty tracking 2 interface\n"); + return FALSE; + } +#endif + } + + if (total_width != screenpix->drawable.width || + max_height != screenpix->drawable.height) { Bool ret; - ret = drmmode_xf86crtc_resize(crtc->scrn, ppix->drawable.width, ppix->drawable.height); + ret = drmmode_xf86crtc_resize(crtc->scrn, total_width, max_height); if (ret == FALSE) return FALSE; screenpix = screen->GetScreenPixmap(screen); - screen->width = screenpix->drawable.width = ppix->drawable.width; - screen->height = screenpix->drawable.height = ppix->drawable.height; + screen->width = screenpix->drawable.width = total_width; + screen->height = screenpix->drawable.height = max_height; } + drmmode_crtc->scanout_pixmap_x = this_x; +#ifdef HAS_DIRTYTRACKING2 + PixmapStartDirtyTracking2(ppix, screenpix, 0, 0, this_x, 0); +#else PixmapStartDirtyTracking(ppix, screenpix, 0, 0); +#endif return TRUE; } #endif diff --git a/src/drmmode_display.h b/src/drmmode_display.h index 2fccfda..41e29f6 100644 --- a/src/drmmode_display.h +++ b/src/drmmode_display.h @@ -81,6 +81,7 @@ typedef struct { int dpms_last_fps; uint32_t interpolated_vblanks; uint16_t lut_r[256], lut_g[256], lut_b[256]; + int scanout_pixmap_x; } drmmode_crtc_private_rec, *drmmode_crtc_private_ptr; typedef struct { commit 429d5b797769895eb4f5fef816ce4e2f3a342031 Author: Dave Airlie <airl...@redhat.com> Date: Tue Jan 8 15:56:37 2013 +1000 radeon: add support for reverse prime (v2) This adds support for reverse prime configurations v2: fix compilation with older xservers Signed-off-by: Alex Deucher <alexdeuc...@gmail.com> diff --git a/src/drmmode_display.c b/src/drmmode_display.c index a614216..244a98f 100644 --- a/src/drmmode_display.c +++ b/src/drmmode_display.c @@ -52,6 +52,9 @@ #define DEFAULT_NOMINAL_FRAME_RATE 60 static Bool +drmmode_xf86crtc_resize (ScrnInfoPtr scrn, int width, int height); + +static Bool RADEONZaphodStringMatches(ScrnInfoPtr pScrn, const char *s, char *output_name) { int i = 0; @@ -522,6 +525,11 @@ drmmode_set_mode_major(xf86CrtcPtr crtc, DisplayModePtr mode, drmmode_ConvertToKMode(crtc->scrn, &kmode, mode); fb_id = drmmode->fb_id; +#ifdef RADEON_PIXMAP_SHARING + if (crtc->randr_crtc && crtc->randr_crtc->scanout_pixmap) + x = y = 0; + else +#endif if (drmmode_crtc->rotate_fb_id) { fb_id = drmmode_crtc->rotate_fb_id; x = y = 0; @@ -727,6 +735,35 @@ drmmode_crtc_gamma_set(xf86CrtcPtr crtc, uint16_t *red, uint16_t *green, size, red, green, blue); } +#ifdef RADEON_PIXMAP_SHARING +static Bool +drmmode_set_scanout_pixmap(xf86CrtcPtr crtc, PixmapPtr ppix) +{ + ScreenPtr screen = xf86ScrnToScreen(crtc->scrn); + PixmapPtr screenpix = screen->GetScreenPixmap(screen); + + if (!ppix) { + if (crtc->randr_crtc->scanout_pixmap) + PixmapStopDirtyTracking(crtc->randr_crtc->scanout_pixmap, screenpix); + return TRUE; + } + + if (ppix->drawable.width > screenpix->drawable.width || + ppix->drawable.height > screenpix->drawable.height) { + Bool ret; + ret = drmmode_xf86crtc_resize(crtc->scrn, ppix->drawable.width, ppix->drawable.height); + if (ret == FALSE) + return FALSE; + + screenpix = screen->GetScreenPixmap(screen); + screen->width = screenpix->drawable.width = ppix->drawable.width; + screen->height = screenpix->drawable.height = ppix->drawable.height; + } + PixmapStartDirtyTracking(ppix, screenpix, 0, 0); + return TRUE; +} +#endif + static const xf86CrtcFuncsRec drmmode_crtc_funcs = { .dpms = drmmode_crtc_dpms, .set_mode_major = drmmode_set_mode_major, @@ -741,6 +778,9 @@ static const xf86CrtcFuncsRec drmmode_crtc_funcs = { .shadow_allocate = drmmode_crtc_shadow_allocate, .shadow_destroy = drmmode_crtc_shadow_destroy, .destroy = NULL, /* XXX */ +#ifdef RADEON_PIXMAP_SHARING + .set_scanout_pixmap = drmmode_set_scanout_pixmap, +#endif }; int drmmode_get_crtc_id(xf86CrtcPtr crtc) diff --git a/src/radeon_kms.c b/src/radeon_kms.c index c3f50d5..edc3b04 100644 --- a/src/radeon_kms.c +++ b/src/radeon_kms.c @@ -257,7 +257,7 @@ redisplay_dirty(ScreenPtr screen, PixmapDirtyUpdatePtr dirty) ScrnInfoPtr pScrn = xf86ScreenToScrn(screen); RegionRec pixregion; - PixmapRegionInit(&pixregion, dirty->slave_dst->master_pixmap); + PixmapRegionInit(&pixregion, dirty->slave_dst); DamageRegionAppend(&dirty->slave_dst->drawable, &pixregion); PixmapSyncDirtyHelper(dirty, &pixregion); @@ -766,7 +766,7 @@ static void RADEONSetupCapabilities(ScrnInfoPtr pScrn) if (value & DRM_PRIME_CAP_EXPORT) pScrn->capabilities |= RR_Capability_SourceOutput | RR_Capability_SinkOffload; if (value & DRM_PRIME_CAP_IMPORT) - pScrn->capabilities |= RR_Capability_SourceOffload; + pScrn->capabilities |= RR_Capability_SourceOffload | RR_Capability_SinkOutput; } #endif } commit 4de9356a2900ae0fb380a2350791ef045629cd05 Author: Alex Deucher <alexander.deuc...@amd.com> Date: Mon Aug 5 17:57:16 2013 -0400 radeon: fix naming clashes with multiple GPUs (v3) The compat naming code for UMS causes problems with multiple GPU as you may end up with the same output name on multiple GPUs. Adjust the naming on secondary GPUs to avoid conflicts. v2: integrate Dave's fixes for nouveau v3: keep compat with existing naming on primary GPU Signed-off-by: Alex Deucher <alexander.deuc...@amd.com> diff --git a/src/drmmode_display.c b/src/drmmode_display.c index 3a0187e..a614216 100644 --- a/src/drmmode_display.c +++ b/src/drmmode_display.c @@ -1108,6 +1108,8 @@ const char *output_names[] = { "None", "eDP" }; +#define NUM_OUTPUT_NAMES (sizeof(output_names) / sizeof(output_names[0])) + static void drmmode_output_init(ScrnInfoPtr pScrn, drmmode_ptr drmmode, int num, int *num_dvi, int *num_hdmi) { @@ -1137,30 +1139,43 @@ drmmode_output_init(ScrnInfoPtr pScrn, drmmode_ptr drmmode, int num, int *num_dv } } - /* need to do smart conversion here for compat with non-kms ATI driver */ - if (koutput->connector_type_id == 1) { - switch(koutput->connector_type) { - case DRM_MODE_CONNECTOR_DVII: - case DRM_MODE_CONNECTOR_DVID: - case DRM_MODE_CONNECTOR_DVIA: - snprintf(name, 32, "%s-%d", output_names[koutput->connector_type], *num_dvi); - (*num_dvi)++; - break; - case DRM_MODE_CONNECTOR_HDMIA: - case DRM_MODE_CONNECTOR_HDMIB: - snprintf(name, 32, "%s-%d", output_names[koutput->connector_type], *num_hdmi); - (*num_hdmi)++; - break; - case DRM_MODE_CONNECTOR_VGA: - case DRM_MODE_CONNECTOR_DisplayPort: - snprintf(name, 32, "%s-%d", output_names[koutput->connector_type], koutput->connector_type_id - 1); - break; - default: - snprintf(name, 32, "%s", output_names[koutput->connector_type]); - break; - } - } else { - snprintf(name, 32, "%s-%d", output_names[koutput->connector_type], koutput->connector_type_id - 1); + if (koutput->connector_type >= NUM_OUTPUT_NAMES) + snprintf(name, 32, "Unknown%d-%d", koutput->connector_type, + koutput->connector_type_id - 1); +#ifdef RADEON_PIXMAP_SHARING + else if (pScrn->is_gpu) + snprintf(name, 32, "%s-%d-%d", + output_names[koutput->connector_type], pScrn->scrnIndex - GPU_SCREEN_OFFSET + 1, + koutput->connector_type_id - 1); +#endif + else { + /* need to do smart conversion here for compat with non-kms ATI driver */ + if (koutput->connector_type_id == 1) { + switch(koutput->connector_type) { + case DRM_MODE_CONNECTOR_DVII: + case DRM_MODE_CONNECTOR_DVID: + case DRM_MODE_CONNECTOR_DVIA: + snprintf(name, 32, "%s-%d", output_names[koutput->connector_type], *num_dvi); + (*num_dvi)++; + break; + case DRM_MODE_CONNECTOR_HDMIA: + case DRM_MODE_CONNECTOR_HDMIB: + snprintf(name, 32, "%s-%d", output_names[koutput->connector_type], *num_hdmi); + (*num_hdmi)++; + break; + case DRM_MODE_CONNECTOR_VGA: + case DRM_MODE_CONNECTOR_DisplayPort: + snprintf(name, 32, "%s-%d", output_names[koutput->connector_type], + koutput->connector_type_id - 1); + break; + default: + snprintf(name, 32, "%s", output_names[koutput->connector_type]); + break; + } + } else { + snprintf(name, 32, "%s-%d", output_names[koutput->connector_type], + koutput->connector_type_id - 1); + } } if (xf86IsEntityShared(pScrn->entityList[0])) { commit 2ae6bb18fefddb309920fa69c9b56c3a7f3db7b4 Author: Grigori Goronzy <g...@chown.ath.cx> Date: Wed Jul 31 12:01:20 2013 +0200 EXA/evergreen/ni: replace magic number Signed-off-by: Alex Deucher <alexdeuc...@gmail.com> diff --git a/src/evergreen_exa.c b/src/evergreen_exa.c index ee5b06b..ccd102d 100644 --- a/src/evergreen_exa.c +++ b/src/evergreen_exa.c @@ -1111,7 +1111,7 @@ static Bool EVERGREENCheckComposite(int op, PicturePtr pSrcPicture, if (EVERGREENBlendOp[op].src_alpha && (EVERGREENBlendOp[op].blend_cntl & COLOR_SRCBLEND_mask) != (BLEND_ZERO << COLOR_SRCBLEND_shift)) { - if (pSrcPicture->pDrawable || op != 3) + if (pSrcPicture->pDrawable || op != PictOpOver) RADEON_FALLBACK(("Component alpha not supported with source " "alpha and source value blending.\n")); } commit 6a278369c05a298a4367306d986467a9ceacae8c Author: Raul Fernandes <rgfernan...@gmail.com> Date: Tue Jul 30 09:26:05 2013 -0400 EXA/6xx/7xx: optimize non-overlapping Copy In case dst and src rectangles of a Copy operation in the same surface don't overlap, it is safe to skip the scratch surface. This is a common case. Based on evergreen/ni patch from Grigori Goronzy. Signed-off-by: Alex Deucher <alexander.deuc...@amd.com> diff --git a/src/r600_exa.c b/src/r600_exa.c index b243234..a354ccd 100644 --- a/src/r600_exa.c +++ b/src/r600_exa.c @@ -643,7 +643,12 @@ R600Copy(PixmapPtr pDst, if (accel_state->vsync) RADEONVlineHelperSet(pScrn, dstX, dstY, dstX + w, dstY + h); - if (accel_state->same_surface && accel_state->copy_area) { + if (accel_state->same_surface && + (srcX + w <= dstX || dstX + w <= srcX || srcY + h <= dstY || dstY + h <= srcY)) { + R600DoPrepareCopy(pScrn); + R600AppendCopyVertex(pScrn, srcX, srcY, dstX, dstY, w, h); + R600DoCopyVline(pDst); + } else if (accel_state->same_surface && accel_state->copy_area) { uint32_t orig_dst_domain = accel_state->dst_obj.domain; uint32_t orig_src_domain = accel_state->src_obj[0].domain; uint32_t orig_src_tiling_flags = accel_state->src_obj[0].tiling_flags; commit 4375a6e75e5d41139be7031a0dee58c057ecbd07 Author: Grigori Goronzy <g...@chown.ath.cx> Date: Mon Jul 22 02:30:28 2013 +0200 EXA/evergreen/ni: accelerate PictOpOver with component alpha Subpixel text rendering is typically done with a solid src and a pixmap mask. Traditionally, this cannot be accelerated in a single pass and requires two passes [1]. However, we can cheat a little with a constant blend color. We can use: const.A = src.A / src.A const.R = src.R / src.A const.G = src.G / src.A const.B = src.B / src.A dst.A = const.A * (src.A * mask.A) + (1 - (src.A * mask.A)) * dst.A dst.R = const.R * (src.A * mask.R) + (1 - (src.A * mask.R)) * dst.R dst.G = const.G * (src.A * mask.G) + (1 - (src.A * mask.G)) * dst.G dst.B = const.B * (src.A * mask.B) + (1 - (src.A * mask.B)) * dst.B This only needs a single source value. src.A is cancelled down in the right places. [1] http://anholt.livejournal.com/32058.html diff --git a/src/evergreen_accel.c b/src/evergreen_accel.c index 10f2e51..e25010b 100644 --- a/src/evergreen_accel.c +++ b/src/evergreen_accel.c @@ -335,7 +335,19 @@ evergreen_set_render_target(ScrnInfoPtr pScrn, cb_config_t *cb_conf, uint32_t do (CB_NORMAL << CB_COLOR_CONTROL__MODE_shift))); EREG(CB_BLEND0_CONTROL, cb_conf->blendcntl); END_BATCH(); +} +void evergreen_set_blend_color(ScrnInfoPtr pScrn, float *color) +{ + RADEONInfoPtr info = RADEONPTR(pScrn); + + BEGIN_BATCH(2 + 4); + PACK0(CB_BLEND_RED, 4); + EFLOAT(color[0]); /* R */ + EFLOAT(color[1]); /* G */ + EFLOAT(color[2]); /* B */ + EFLOAT(color[3]); /* A */ + END_BATCH(); } static void diff --git a/src/evergreen_exa.c b/src/evergreen_exa.c index 5b8a631..ee5b06b 100644 --- a/src/evergreen_exa.c +++ b/src/evergreen_exa.c @@ -704,6 +704,14 @@ static uint32_t EVERGREENGetBlendCntl(int op, PicturePtr pMask, uint32_t dst_for } else if (dblend == (BLEND_ONE_MINUS_SRC_ALPHA << COLOR_DESTBLEND_shift)) { dblend = (BLEND_ONE_MINUS_SRC_COLOR << COLOR_DESTBLEND_shift); } + + /* With some tricks, we can still accelerate PictOpOver with solid src. + * This is commonly used for text rendering, so it's worth the extra + * effort. + */ + if (sblend == (BLEND_ONE << COLOR_SRCBLEND_shift)) { + sblend = (BLEND_CONSTANT_COLOR << COLOR_SRCBLEND_shift); + } } return sblend | dblend; @@ -1095,12 +1103,17 @@ static Bool EVERGREENCheckComposite(int op, PicturePtr pSrcPicture, /* Check if it's component alpha that relies on a source alpha and * on the source value. We can only get one of those into the * single source value that we get to blend with. + * + * We can cheat a bit if the src is solid, though. PictOpOver + * can use the constant blend color to sneak a second blend + * source in. */ if (EVERGREENBlendOp[op].src_alpha && (EVERGREENBlendOp[op].blend_cntl & COLOR_SRCBLEND_mask) != (BLEND_ZERO << COLOR_SRCBLEND_shift)) { - RADEON_FALLBACK(("Component alpha not supported with source " - "alpha and source value blending.\n")); + if (pSrcPicture->pDrawable || op != 3) + RADEON_FALLBACK(("Component alpha not supported with source " + "alpha and source value blending.\n")); } } @@ -1196,6 +1209,11 @@ static void EVERGREENSetSolidConsts(ScrnInfoPtr pScrn, float *buf, int format, u } else { if (accel_state->component_alpha) { if (accel_state->src_alpha) { + /* required for PictOpOver */ + float cblend[4] = { pix_r / pix_a, pix_g / pix_a, + pix_b / pix_a, pix_a / pix_a }; + evergreen_set_blend_color(pScrn, cblend); + if (PICT_FORMAT_A(format) == 0) { pix_r = 1.0; pix_g = 1.0; diff --git a/src/evergreen_state.h b/src/evergreen_state.h index 3ce2bf2..795d447 100644 --- a/src/evergreen_state.h +++ b/src/evergreen_state.h @@ -297,6 +297,8 @@ evergreen_start_3d(ScrnInfoPtr pScrn); void evergreen_set_render_target(ScrnInfoPtr pScrn, cb_config_t *cb_conf, uint32_t domain); void +evergreen_set_blend_color(ScrnInfoPtr pScrn, float *color); +void evergreen_cp_wait_vline_sync(ScrnInfoPtr pScrn, PixmapPtr pPix, xf86CrtcPtr crtc, int start, int stop); void evergreen_set_spi(ScrnInfoPtr pScrn, int vs_export_count, int num_interp); commit 94d0d14914a025525a0766669b556eaa6681def7 Author: Grigori Goronzy <g...@chown.ath.cx> Date: Thu Jul 18 16:06:23 2013 +0200 EXA/evergreen/ni: fast solid pixmap support Solid pixmaps are currently implemented with scratch pixmaps, which is slow. This replaces the hack with a proper implementation. The Composite shader can now either sample a src/mask or use a constant value. diff --git a/src/cayman_shader.c b/src/cayman_shader.c index 2a6d6b1..59f4177 100644 --- a/src/cayman_shader.c +++ b/src/cayman_shader.c @@ -2495,17 +2495,44 @@ int cayman_comp_ps(RADEONChipFamily ChipSet, uint32_t* shader) int i = 0; /* 0 */ - shader[i++] = CF_DWORD0(ADDR(3), + /* call interp-fetch-mask if boolean1 == true */ + shader[i++] = CF_DWORD0(ADDR(12), JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A)); shader[i++] = CF_DWORD1(POP_COUNT(0), - CF_CONST(0), + CF_CONST(1), COND(SQ_CF_COND_BOOL), I_COUNT(0), VALID_PIXEL_MODE(0), CF_INST(SQ_CF_INST_CALL), BARRIER(0)); + /* 1 */ - shader[i++] = CF_DWORD0(ADDR(8), + /* call read-constant-mask if boolean1 == false */ + shader[i++] = CF_DWORD0(ADDR(15), + JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A)); + shader[i++] = CF_DWORD1(POP_COUNT(0), + CF_CONST(1), + COND(SQ_CF_COND_NOT_BOOL), + I_COUNT(0), + VALID_PIXEL_MODE(0), + CF_INST(SQ_CF_INST_CALL), + BARRIER(0)); + + /* 2 */ + /* call interp-fetch-src if boolean0 == true */ + shader[i++] = CF_DWORD0(ADDR(7), + JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A)); + shader[i++] = CF_DWORD1(POP_COUNT(0), + CF_CONST(0), + COND(SQ_CF_COND_BOOL), + I_COUNT(0), + VALID_PIXEL_MODE(0), + CF_INST(SQ_CF_INST_CALL), + BARRIER(0)); + + /* 3 */ + /* call read-constant-src if boolean0 == false */ + shader[i++] = CF_DWORD0(ADDR(10), JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A)); shader[i++] = CF_DWORD1(POP_COUNT(0), CF_CONST(0), @@ -2514,7 +2541,41 @@ int cayman_comp_ps(RADEONChipFamily ChipSet, uint32_t* shader) VALID_PIXEL_MODE(0), CF_INST(SQ_CF_INST_CALL), BARRIER(0)); - /* 2 - end */ + /* 4 */ + /* src IN mask (GPR2 := GPR1 .* GPR0) */ + shader[i++] = CF_ALU_DWORD0(ADDR(17), + KCACHE_BANK0(0), + KCACHE_BANK1(0), + KCACHE_MODE0(SQ_CF_KCACHE_NOP)); + shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP), + KCACHE_ADDR0(0), + KCACHE_ADDR1(0), + I_COUNT(4), + ALT_CONST(0), + CF_INST(SQ_CF_INST_ALU), + WHOLE_QUAD_MODE(0), + BARRIER(1)); + + /* 5 */ + /* export pixel data */ + shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0), + TYPE(SQ_EXPORT_PIXEL), + RW_GPR(0), + RW_REL(ABSOLUTE), + INDEX_GPR(0), + ELEM_SIZE(1)); + shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), + SRC_SEL_Y(SQ_SEL_Y), + SRC_SEL_Z(SQ_SEL_Z), + SRC_SEL_W(SQ_SEL_W), + BURST_COUNT(1), + VALID_PIXEL_MODE(0), + CF_INST(SQ_CF_INST_EXPORT_DONE), + MARK(0), + BARRIER(1)); + + /* 6 */ + /* end of program */ shader[i++] = CF_DWORD0(ADDR(0), JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A)); shader[i++] = CF_DWORD1(POP_COUNT(0), @@ -2524,33 +2585,53 @@ int cayman_comp_ps(RADEONChipFamily ChipSet, uint32_t* shader) VALID_PIXEL_MODE(0), CF_INST(SQ_CF_INST_END), BARRIER(1)); - /* 3 - mask sub */ - shader[i++] = CF_ALU_DWORD0(ADDR(12), + + /* subroutine interp-fetch-src */ + + /* 7 */ + /* interpolate src */ + shader[i++] = CF_ALU_DWORD0(ADDR(21), KCACHE_BANK0(0), KCACHE_BANK1(0), KCACHE_MODE0(SQ_CF_KCACHE_NOP)); shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP), KCACHE_ADDR0(0), KCACHE_ADDR1(0), - I_COUNT(8), + I_COUNT(4), ALT_CONST(0), CF_INST(SQ_CF_INST_ALU), WHOLE_QUAD_MODE(0), BARRIER(1)); - /* 4 */ - shader[i++] = CF_DWORD0(ADDR(28), + /* 8 */ + /* texture fetch src into GPR0 */ + shader[i++] = CF_DWORD0(ADDR(26), JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A)); shader[i++] = CF_DWORD1(POP_COUNT(0), CF_CONST(0), COND(SQ_CF_COND_ACTIVE), - I_COUNT(2), + I_COUNT(1), VALID_PIXEL_MODE(0), CF_INST(SQ_CF_INST_TC), BARRIER(1)); - /* 5 */ - shader[i++] = CF_ALU_DWORD0(ADDR(20), + /* 9 */ + /* return */ + shader[i++] = CF_DWORD0(ADDR(0), + JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A)); + shader[i++] = CF_DWORD1(POP_COUNT(0), + CF_CONST(0), + COND(SQ_CF_COND_ACTIVE), + I_COUNT(0), + VALID_PIXEL_MODE(0), + CF_INST(SQ_CF_INST_RETURN), + BARRIER(0)); + + /* subroutine read-constant-src */ + + /* 10 */ + /* read constants into GPR0 */ + shader[i++] = CF_ALU_DWORD0(ADDR(28), KCACHE_BANK0(0), KCACHE_BANK1(0), KCACHE_MODE0(SQ_CF_KCACHE_LOCK_1)); @@ -2558,29 +2639,13 @@ int cayman_comp_ps(RADEONChipFamily ChipSet, uint32_t* shader) KCACHE_ADDR0(0), KCACHE_ADDR1(0), I_COUNT(4), - ALT_CONST(0), + ALT_CONST(1), CF_INST(SQ_CF_INST_ALU), WHOLE_QUAD_MODE(0), BARRIER(1)); - /* 6 */ - shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0), - TYPE(SQ_EXPORT_PIXEL), - RW_GPR(2), - RW_REL(ABSOLUTE), - INDEX_GPR(0), - ELEM_SIZE(1)); - - shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), - SRC_SEL_Y(SQ_SEL_Y), - SRC_SEL_Z(SQ_SEL_Z), - SRC_SEL_W(SQ_SEL_W), - BURST_COUNT(1), - VALID_PIXEL_MODE(0), - CF_INST(SQ_CF_INST_EXPORT_DONE), - MARK(0), - BARRIER(1)); - /* 7 */ + /* 11 */ + /* return */ shader[i++] = CF_DWORD0(ADDR(0), JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A)); shader[i++] = CF_DWORD1(POP_COUNT(0), @@ -2589,10 +2654,13 @@ int cayman_comp_ps(RADEONChipFamily ChipSet, uint32_t* shader) I_COUNT(0), VALID_PIXEL_MODE(0), CF_INST(SQ_CF_INST_RETURN), - BARRIER(1)); + BARRIER(0)); - /* 8 - non-mask sub */ - shader[i++] = CF_ALU_DWORD0(ADDR(24), + /* subroutine interp-fetch-mask */ + + /* 12 */ + /* interpolate mask */ + shader[i++] = CF_ALU_DWORD0(ADDR(32), KCACHE_BANK0(0), KCACHE_BANK1(0), KCACHE_MODE0(SQ_CF_KCACHE_NOP)); @@ -2604,8 +2672,10 @@ int cayman_comp_ps(RADEONChipFamily ChipSet, uint32_t* shader) CF_INST(SQ_CF_INST_ALU), WHOLE_QUAD_MODE(0), BARRIER(1)); - /* 9 */ - shader[i++] = CF_DWORD0(ADDR(32), + + /* 13 */ + /* texture fetch mask into GPR1 */ + shader[i++] = CF_DWORD0(ADDR(36), JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A)); shader[i++] = CF_DWORD1(POP_COUNT(0), CF_CONST(0), @@ -2615,24 +2685,37 @@ int cayman_comp_ps(RADEONChipFamily ChipSet, uint32_t* shader) CF_INST(SQ_CF_INST_TC), BARRIER(1)); - /* 10 */ - shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0), - TYPE(SQ_EXPORT_PIXEL), - RW_GPR(0), - RW_REL(ABSOLUTE), - INDEX_GPR(0), - ELEM_SIZE(1)); - shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), - SRC_SEL_Y(SQ_SEL_Y), - SRC_SEL_Z(SQ_SEL_Z), - SRC_SEL_W(SQ_SEL_W), - BURST_COUNT(1), - VALID_PIXEL_MODE(0), - CF_INST(SQ_CF_INST_EXPORT_DONE), - MARK(0), - BARRIER(1)); + /* 14 */ + /* return */ + shader[i++] = CF_DWORD0(ADDR(0), + JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A)); + shader[i++] = CF_DWORD1(POP_COUNT(0), + CF_CONST(0), + COND(SQ_CF_COND_ACTIVE), + I_COUNT(0), + VALID_PIXEL_MODE(0), + CF_INST(SQ_CF_INST_RETURN), + BARRIER(0)); - /* 11 */ + /* subroutine read-constant-src */ + + /* 15 */ + /* read constants into GPR1 */ + shader[i++] = CF_ALU_DWORD0(ADDR(38), + KCACHE_BANK0(0), + KCACHE_BANK1(0), + KCACHE_MODE0(SQ_CF_KCACHE_LOCK_1)); + shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP), + KCACHE_ADDR0(0), + KCACHE_ADDR1(0), + I_COUNT(4), + ALT_CONST(1), + CF_INST(SQ_CF_INST_ALU), + WHOLE_QUAD_MODE(0), + BARRIER(1)); + + /* 16 */ + /* return */ shader[i++] = CF_DWORD0(ADDR(0), JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A)); shader[i++] = CF_DWORD1(POP_COUNT(0), @@ -2641,18 +2724,21 @@ int cayman_comp_ps(RADEONChipFamily ChipSet, uint32_t* shader) I_COUNT(0), VALID_PIXEL_MODE(0), CF_INST(SQ_CF_INST_RETURN), - BARRIER(1)); + BARRIER(0)); + + /* ALU clauses */ -- To UNSUBSCRIBE, email to debian-x-requ...@lists.debian.org with a subject of "unsubscribe". Trouble? Contact listmas...@lists.debian.org Archive: http://lists.debian.org/e1v79kn-0001b8...@vasks.debian.org