from:"Tom Stellard"

[Mesa-dev] [PATCH] radeon/llvm: Handle LLVM backend rename from R600 to AMDGPU

2015-06-11 Thread Tom Stellard

---
 configure.ac  | 13 -
 src/gallium/drivers/radeon/radeon_llvm_emit.c |  8 
 2 files changed, 16 insertions(+), 5 deletions(-)

diff --git a/configure.ac b/configure.ac
index d32aa24..eda8d23 100644
--- a/configure.ac
+++ b/configure.ac
@@ -2048,16 +2048,19 @@ require_egl_drm() {
 }
 
 radeon_llvm_check() {
+if test ${LLVM_VERSION_INT} -lt 307; then
+amdgpu_llvm_target_name='r600'
+else
+amdgpu_llvm_target_name='amdgpu'
+fi
 if test "x$enable_gallium_llvm" != "xyes"; then
 AC_MSG_ERROR([--enable-gallium-llvm is required when building $1])
 fi
 llvm_check_version_for "3" "4" "2" $1 
-if test true && $LLVM_CONFIG --targets-built | grep -qvw 'R600' ; then
-AC_MSG_ERROR([LLVM R600 Target not enabled.  You can enable it when 
building the LLVM
-  sources with the --enable-experimental-targets=R600
-  configure flag])
+if test true && $LLVM_CONFIG --targets-built | grep -iqvw 
$amdgpu_llvm_target_name ; then
+AC_MSG_ERROR([LLVM $amdgpu_llvm_target_name not enabled in your LLVM 
build.])
 fi
-LLVM_COMPONENTS="${LLVM_COMPONENTS} r600 bitreader ipo"
+LLVM_COMPONENTS="${LLVM_COMPONENTS} $amdgpu_llvm_target_name bitreader ipo"
 NEED_RADEON_LLVM=yes
 if test "x$have_libelf" != xyes; then
AC_MSG_ERROR([$1 requires libelf when using llvm])
diff --git a/src/gallium/drivers/radeon/radeon_llvm_emit.c 
b/src/gallium/drivers/radeon/radeon_llvm_emit.c
index 624077c..25580b6 100644
--- a/src/gallium/drivers/radeon/radeon_llvm_emit.c
+++ b/src/gallium/drivers/radeon/radeon_llvm_emit.c
@@ -86,10 +86,18 @@ static void init_r600_target()
 {
static unsigned initialized = 0;
if (!initialized) {
+#if HAVE_LLVM < 0x0307
LLVMInitializeR600TargetInfo();
LLVMInitializeR600Target();
LLVMInitializeR600TargetMC();
LLVMInitializeR600AsmPrinter();
+#else
+   LLVMInitializeAMDGPUTargetInfo();
+   LLVMInitializeAMDGPUTarget();
+   LLVMInitializeAMDGPUTargetMC();
+   LLVMInitializeAMDGPUAsmPrinter();
+
+#endif
initialized = 1;
}
 }
-- 
2.0.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] configure.ac: rename LLVM_VERSION_PATCH to avoid conflict with llvm-config.h

2015-06-15 Thread Tom Stellard

On Sat, Jun 13, 2015 at 08:16:23PM +0200, Marek Olšák wrote:
> From: Marek Olšák 
> 

Reviewed-by: Tom Stellard 

> ---
>  configure.ac | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/configure.ac b/configure.ac
> index 34d1ac9..e6d947e 100644
> --- a/configure.ac
> +++ b/configure.ac
> @@ -1929,7 +1929,7 @@ if test "x$enable_gallium_llvm" = xyes; then
>  LLVM_COMPONENTS="${LLVM_COMPONENTS} all-targets ipo linker 
> instrumentation"
>  LLVM_COMPONENTS="${LLVM_COMPONENTS} irreader option objcarcopts 
> profiledata"
>  fi
> -DEFINES="${DEFINES} -DHAVE_LLVM=0x0$LLVM_VERSION_INT 
> -DLLVM_VERSION_PATCH=$LLVM_VERSION_PATCH"
> +DEFINES="${DEFINES} -DHAVE_LLVM=0x0$LLVM_VERSION_INT 
> -DMESA_LLVM_VERSION_PATCH=$LLVM_VERSION_PATCH"
>  MESA_LLVM=1
>  
>  dnl Check for Clang internal headers
> -- 
> 2.1.0
> 
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH shader-db] si-report: Fix algorithm for determing difference in shader stats

2015-06-15 Thread Tom Stellard

---
 si-report.py | 8 +++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/si-report.py b/si-report.py
index fbeed11..ec88112 100755
--- a/si-report.py
+++ b/si-report.py
@@ -145,7 +145,13 @@ class si_stats:
 self.__dict__[name] += 1
 
 def is_empty(self):
-return sum(map(lambda x : x[0] if type(x) == tuple else x, 
self.__dict__.values())) == 0
+for name in self.__dict__.keys():
+x = self.__dict__[name]
+if type(x) == tuple and x[0] is not 0:
+return False
+if type(x) != tuple and x is not 0:
+return False
+return True
 
 def get_results(filename):
 file = open(filename, "r")
-- 
2.0.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] configure.ac: rename LLVM_VERSION_PATCH to avoid conflict with llvm-config.h

2015-06-16 Thread Tom Stellard

On Tue, Jun 16, 2015 at 08:07:57PM +0100, Emil Velikov wrote:
> On 13 June 2015 at 19:16, Marek Olšák  wrote:
> > From: Marek Olšák 
> >
> > ---
> >  configure.ac | 2 +-
> >  1 file changed, 1 insertion(+), 1 deletion(-)
> >
> > diff --git a/configure.ac b/configure.ac
> > index 34d1ac9..e6d947e 100644
> > --- a/configure.ac
> > +++ b/configure.ac
> > @@ -1929,7 +1929,7 @@ if test "x$enable_gallium_llvm" = xyes; then
> >  LLVM_COMPONENTS="${LLVM_COMPONENTS} all-targets ipo linker 
> > instrumentation"
> >  LLVM_COMPONENTS="${LLVM_COMPONENTS} irreader option 
> > objcarcopts profiledata"
> >  fi
> > -DEFINES="${DEFINES} -DHAVE_LLVM=0x0$LLVM_VERSION_INT 
> > -DLLVM_VERSION_PATCH=$LLVM_VERSION_PATCH"
> > +DEFINES="${DEFINES} -DHAVE_LLVM=0x0$LLVM_VERSION_INT 
> > -DMESA_LLVM_VERSION_PATCH=$LLVM_VERSION_PATCH"
> 
> Silly questions:
> If LLVM already sets LLVM_VERSION_PATCH shouldn't we be using it,
> rather than setting our own ? Perhaps we can drop the define
> altogether, considering that we're not using it ?
> 

Depending on the version of llvm and the build system that was used
LLVM_VERSION_PATH may or not be defined, so we can't rely on it.

-Tom

> Cheers,
> Emil
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] abundance of branches in mesa.git

2015-06-22 Thread Tom Stellard

On Mon, Jun 22, 2015 at 12:23:54PM +0200, Marek Olšák wrote:
> On Mon, Jun 22, 2015 at 5:36 AM, Ilia Mirkin  wrote:
> > On Sun, Jun 21, 2015 at 11:33 PM, Michel Dänzer  wrote:
> >> On 22.06.2015 00:31, Ilia Mirkin wrote:
> >>> On Sun, Jun 21, 2015 at 12:22 PM, Emil Velikov  
> >>> wrote:
>  On 20/06/15 10:01, Eirik Byrkjeflot Anonsen wrote:
> > Ilia Mirkin  writes:
> >
> >> Hello,
> >>
> >> There are a *ton* of branches in the upstream mesa git. Here is a full 
> >> list:
> >>
> > [...]
> >> is there
> >> any reason to keep these around with the exception of:
> >>
> >> master
> >> $version (i.e. 9.0, 10.0, mesa_7_7_branch, etc)
> >
> > Instead of outright deleting old branches, it would be possible to set
> > up an "archive" repository which mirrors all branches of the main
> > repository. And then delete "obsolete" branches only from the main
> > repository. Ideally, you would want a git hook to refuse to create a new
> > branch (in the main repository) if a branch by that name already exists
> > in the archive repository. Possibly with the exception that creating a
> > same-named branch on the same commit would be allowed.
> >
> > (And the same for tags, of course)
> >
>  Personally I am fine with either approach - stay/nuke/move. But I'm
>  thinking that having a mix of the two suggestions might be a nice middle
>  ground.
> 
>  Write a script that nukes branches that are merged in master (check the
>  top commit of the branch) and have an 'archive' repo that contains
>  everything else (minus the stable branches).
> >>
> >> Sounds good to me, FWIW.
> >>
> >>
> >>> That still leaves a ton around, and curiously removes mesa_7_5 and 
> >>> mesa_7_6.
> >>
> >> I think the latter is expected, we were using a different branching
> >> model back in those days.
> >>
> >>
> >>>origin/amdgpu
> >>
> >> Note that this is a currently active branch, to be merged to master soon.
> >
> > Perhaps there's something I don't understand, but why is a feature
> > branch made available on the shared tree? In my view of things the
> > only branches on the shared mesa.git tree should be the version
> > branches.
> 
> As you can see, a lot of feature branches are in the shared tree
> already, so there is a precedent. Sharing a branch among people in
> this way sometimes tends to be more convenient.
> 
> The reason here is that it's the only mesa repository where most
> people from our team have commit access.
> 

Also, the shared git tree supports https access, which means it is
accessible when behind a firewall.

-Tom

> Marek
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [RFC shader-db] Add support for shadertoy tests

2015-06-23 Thread Tom Stellard

On Tue, Jun 23, 2015 at 08:18:45PM -0400, Rob Clark wrote:
> On Tue, Jun 23, 2015 at 7:27 PM, Dylan Baker  wrote:
> > I have a couple of python pointers for you, feel free to take them or
> > leave them.
> 
> cool, thanks..
> 
> What do others think about including shadertoy in shader-db?  If it is
> a useful thing, I'll clean up my script and re-submit..
> 
> And if we include it, should we commit the scripts we pull down for
> repeatable results and just keep the script as something to resync and
> pull in new shaders?  (Esp. given that a small percentage need some
> hand massaging.. I haven't figured out a good way to
> reliably/programatically figure out if the samplers should actually be
> 2d/3d/cube..)
> 

I'm in favor of including shadertoy shaders.

-Tom

> That said, I think the shaderdb shaders are a fairly, umm, unique
> stress test of a shader compiler.. and the API to scrape shaders seems
> to convenient to ignore..
> 
> BR,
> -R
> 
> > Dylan
> >
> > On Tue, Jun 16, 2015 at 03:46:50PM -0400, Rob Clark wrote:
> >> Attached script grabs shaders from shadertoy, and dumps them out as
> >> .shader_test files which can be run through shader-db for compiler
> >> testing.
> >>
> >> shadertoy only gives you a fragment shader (which works based on
> >> gl_FragCoord), so a generic vertex shader is used.  And a blurb is
> >> inserted for the pre-defined uniforms and main() function (which just
> >> calls shadertoy mainImage() fxn).
> >>
> >> ---
> >> TODO I guess we'd actually have to parse the shader to figure out if
> >> the sampler uniforms were meant to be 2D/cube/etc.  Maybe we just
> >> commit samplers we get from the script and massage them by hand?
> >>
> >> PS. don't make fun of my py too much.. I'm a newb and figuring it
> >> out as I go
> >
> > I'm trying not to make fun, but I do have quite a few pointers for you.
> >
> >>
> >>  grab-shadertoy.py | 63 
> >> +++
> >>  1 file changed, 63 insertions(+)
> >>  create mode 100755 grab-shadertoy.py
> >>
> >> diff --git a/grab-shadertoy.py b/grab-shadertoy.py
> >> new file mode 100755
> >> index 000..74e9d10
> >> --- /dev/null
> >> +++ b/grab-shadertoy.py
> >> @@ -0,0 +1,63 @@
> >> +#!/usr/bin/env python3
> >> +
> >> +
> >> +import requests, json
> >
> > You're not actually using json
> >
> >> +
> >> +url = 'https://www.shadertoy.com/api/v1/shaders'
> >> +key = '?key=NdnKw7'
> >> +
> >> +# Get the list of shaders
> >> +r = requests.get(url + key)
> >> +j = r.json()
> >> +print('Found ' + str(j['Shaders']) + ' shaders')
> >
> > If you use format you can avoid calling str() on everything, and make
> > things more readable using format rather than concatenation:
> > print('Found {} shaders'.format(j['Shaders']))
> >
> >> +
> >> +shader_ids = j['Results']
> >> +for id in shader_ids:
> >> +print('Fetching shader: ' + str(id))
> >> +r = requests.get(url + '/' + id + key)
> >> +j = r.json()
> >> +s = j['Shader']
> >> +info = s['info']
> >> +print('Name: ' + info['name'])
> >> +print('Description: ' + info['description'])
> >> +i = 0;
> >
> > python has a cool builtin called enumerate for doing this:
> > for i, p in enmerate(s['renderpass']):
> >
> > Also, I know it's easy to forget, but python doesn't use ';' at the end
> > of lines, it allows them, but they look weird to pythonistas
> >
> >> +for p in s['renderpass']:
> >> +fobj = open('shaders/shadertoy/' + str(id) + '_' + str(i) + 
> >> '.shader_test', 'w')
> >
> > with str.format this would look like:
> > with open('shaders/shadertoy/{}_{}.shader_test'.format(id, i), 'w') as fobj:
> >
> >> +#print('Inputs: ' + str(p['inputs']))
> >> +#print('Outputs: ' + str(p['outputs']))
> >> +fobj.write('[require]\n')
> >> +fobj.write('GLSL >= 1.30\n')
> >> +fobj.write('\n');
> >> +fobj.write('[fragment shader]\n')
> >> +fobj.write('#version 130\n')
> >> +# Shadertoy inserts some uniforms, so we need to do the same:
> >> +fobj.write('uniform vec3  iResolution;\n');
> >> +fobj.write('uniform float iGlobalTime;\n');
> >> +fobj.write('uniform float iChannelTime[4];\n');
> >> +fobj.write('uniform vec4  iMouse;\n');
> >> +fobj.write('uniform vec4  iDate;\n');
> >> +fobj.write('uniform float iSampleRate;\n');
> >> +fobj.write('uniform vec3  iChannelResolution[4];\n');
> >> +# TODO probably need to parse the shader to figure out if 
> >> 2d/cubemap/etc
> >> +fobj.write('uniform sampler2D iChannel0;\n');
> >> +fobj.write('uniform sampler2D iChannel1;\n');
> >> +fobj.write('uniform sampler2D iChannel2;\n');
> >> +fobj.write('uniform sampler2D iChannel3;\n');
> >> +# Actual shadertoy shader body:
> >> +fobj.write(p['code'])
> >> +# Shadertoy shader uses mainImage(out vec4 fragColor, in vec2 
> >> fragCoord)
> >> +

Re: [Mesa-dev] [Mesa-stable] [PATCH 2/6] clover: Call clBuildProgram() notification function when build completes v2

2015-06-25 Thread Tom Stellard

On Thu, Jun 25, 2015 at 03:19:40PM +0100, Emil Velikov wrote:
> On 3 June 2015 at 14:27, Francisco Jerez  wrote:
> > Emil Velikov  writes:
> >
> >> Hi Tom,
> >>
> >> On 31 March 2015 at 15:29, Francisco Jerez  wrote:
> >>> Tom Stellard  writes:
> >>>
> >>>> v2:
> >>>>   - Only call notification for build errors
> >>>>   - Fix clCompileProgram()
> >>>>
> >>>> Cc: 10.5 10.4 
> >>>> ---
> >>>>  src/gallium/state_trackers/clover/api/program.cpp | 16 ++--
> >>>>  1 file changed, 14 insertions(+), 2 deletions(-)
> >>>>
> >>>> diff --git a/src/gallium/state_trackers/clover/api/program.cpp 
> >>>> b/src/gallium/state_trackers/clover/api/program.cpp
> >>>> index 60184ed..5cd543c 100644
> >>>> --- a/src/gallium/state_trackers/clover/api/program.cpp
> >>>> +++ b/src/gallium/state_trackers/clover/api/program.cpp
> >>>> @@ -180,12 +180,18 @@ clBuildProgram(cl_program d_prog, cl_uint num_devs,
> >>>> validate_build_program_common(prog, num_devs, d_devs, pfn_notify, 
> >>>> user_data);
> >>>>
> >>>> prog.build(devs, opts);
> >>>> +   if (pfn_notify)
> >>>> +  pfn_notify(d_prog, user_data);
> >>>
> >>> Maybe leave blank lines around the conditional where preceded/followed
> >>> by another statement?
> >>>
> >>>> return CL_SUCCESS;
> >>>> +} catch (const build_error &e) {
> >>>> +   if (pfn_notify)
> >>>> +  pfn_notify(d_prog, user_data);
> >>>> +   if (e.get() == CL_COMPILE_PROGRAM_FAILURE)
> >>>> +  return CL_BUILD_PROGRAM_FAILURE;
> >>>> +   return e.get();
> >>>
> >>> You can just return CL_BUILD_PROGRAM_FAILURE here unconditionally.
> >>>
> >>>>  } catch (error &e) {
> >>>> if (e.get() == CL_INVALID_COMPILER_OPTIONS)
> >>>>return CL_INVALID_BUILD_OPTIONS;
> >>>> -   if (e.get() == CL_COMPILE_PROGRAM_FAILURE)
> >>>> -  return CL_BUILD_PROGRAM_FAILURE;
> >>>> return e.get();
> >>>>  }
> >>>>
> >>>> @@ -223,8 +229,14 @@ clCompileProgram(cl_program d_prog, cl_uint 
> >>>> num_devs,
> >>>>objs(d_header_progs, num_headers));
> >>>>
> >>>> prog.build(devs, opts, headers);
> >>>> +   if (pfn_notify)
> >>>> +  pfn_notify(d_prog, user_data);
> >>>
> >>> Same here.
> >>>
> >> Did you had the chance to address Francisco's comments ? Did this
> >> patch fell through the cracks, or should I consider it
> >> obsolete/rejected ?
> >>
> >
> > I think the patch was good it just fell through the cracks.  Tom?
> >
> Humble reminder.

Sorry, I haven't had a chance to update this patch, you can ignore it.

-Tom
> 
> -Emil
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 0/8] Render node only opencl and pipe-loader cleanups

2015-07-07 Thread Tom Stellard

On Tue, Jul 07, 2015 at 05:43:19PM +0100, Emil Velikov wrote:
> On 30/06/15 16:09, Emil Velikov wrote:
> > Hello all,
> > 
> > As mentioned over IRC a few weeks back, here is a series that removes 
> > support for non-render node devices.
> > 
> > The two main motivations being:
> >  - Currently we force X/xcb onto everyone that wants to use OpenCL
> > (headless OpenCL systems/farms anyone ?)

Is this really true?  I don't see where lack of xcb prevents users from
building OpenCL.

-Tom

> >  - Nice overall cleanup - 43 insertions(+), 279 deletions(-)
> > 
> > 
> > Note that the final patches touch related code - from removing a unused 
> > function (pipe_loader_sw_probe_xlib) to using loader_open_device() over 
> > open(), with the former caring about CLOEXEC.
> >
> Francisco, Tom,
> 
> Can you guys please take a look at the series. Even an Ack would be
> greatly appreciated.
> 

I have no problems with merging these.

-Tom
> Thanks
> Emil
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH] clover: Fix bug with computing hard_event status

2015-07-10 Thread Tom Stellard

pipe_context::flush() can return a NULL fence if the queue is already
empty, so we should not assume that an event with a NULL fence
has the status of CL_QUEUED.

CC: 10.6 
---
 src/gallium/state_trackers/clover/core/event.cpp | 7 ---
 src/gallium/state_trackers/clover/core/event.hpp | 1 +
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/src/gallium/state_trackers/clover/core/event.cpp 
b/src/gallium/state_trackers/clover/core/event.cpp
index d75b839..b973c78 100644
--- a/src/gallium/state_trackers/clover/core/event.cpp
+++ b/src/gallium/state_trackers/clover/core/event.cpp
@@ -118,7 +118,7 @@ event::wait() const {
 hard_event::hard_event(command_queue &q, cl_command_type command,
const ref_vector &deps, action action) :
event(q.context(), deps, profile(q, action), [](event &ev){}),
-   _queue(q), _command(command), _fence(NULL) {
+   _queue(q), _command(command), _fence(NULL), _fenced(false) {
if (q.profiling_enabled())
   _time_queued = timestamp::current(q);
 
@@ -138,7 +138,7 @@ hard_event::status() const {
if (event::status() < 0)
   return event::status();
 
-   else if (!_fence)
+   else if (!_fenced)
   return CL_QUEUED;
 
else if (!screen->fence_finish(screen, _fence, 0))
@@ -167,7 +167,7 @@ hard_event::wait() const {
if (status() == CL_QUEUED)
   queue()->flush();
 
-   if (!_fence ||
+   if (!_fenced ||
!screen->fence_finish(screen, _fence, PIPE_TIMEOUT_INFINITE))
   throw error(CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST);
 }
@@ -196,6 +196,7 @@ void
 hard_event::fence(pipe_fence_handle *fence) {
pipe_screen *screen = queue()->device().pipe;
screen->fence_reference(screen, &_fence, fence);
+   _fenced = true;
 }
 
 event::action
diff --git a/src/gallium/state_trackers/clover/core/event.hpp 
b/src/gallium/state_trackers/clover/core/event.hpp
index 6469e48..fac62d2 100644
--- a/src/gallium/state_trackers/clover/core/event.hpp
+++ b/src/gallium/state_trackers/clover/core/event.hpp
@@ -137,6 +137,7 @@ namespace clover {
   const intrusive_ref _queue;
   cl_command_type _command;
   pipe_fence_handle *_fence;
+  bool _fenced;
   lazy _time_queued, _time_submit, _time_start, _time_end;
};
 
-- 
2.0.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 1/2] gallium/drivers: Add threadsafe wrappers for pipe_context v2

2015-07-10 Thread Tom Stellard

These wrappers can be used by state trackers to ensure threadsafe access
to pipe_context objects.

v2:
  - Don't add wrappers for pipe_screen.
  - Build system cleanups

CC: 10.6 
---
 configure.ac   |   1 +
 src/gallium/Makefile.am|   1 +
 src/gallium/SConscript |   1 +
 src/gallium/drivers/threadsafe/Makefile.am |  11 +
 src/gallium/drivers/threadsafe/Makefile.sources|   3 +
 src/gallium/drivers/threadsafe/SConscript  |  13 +
 src/gallium/drivers/threadsafe/threadsafe.h|  37 +++
 .../drivers/threadsafe/threadsafe_context.c| 276 +
 8 files changed, 343 insertions(+)
 create mode 100644 src/gallium/drivers/threadsafe/Makefile.am
 create mode 100644 src/gallium/drivers/threadsafe/Makefile.sources
 create mode 100644 src/gallium/drivers/threadsafe/SConscript
 create mode 100644 src/gallium/drivers/threadsafe/threadsafe.h
 create mode 100644 src/gallium/drivers/threadsafe/threadsafe_context.c

diff --git a/configure.ac b/configure.ac
index d819bef..6f93a2e 100644
--- a/configure.ac
+++ b/configure.ac
@@ -2358,6 +2358,7 @@ AC_CONFIG_FILES([Makefile
src/gallium/drivers/rbug/Makefile
src/gallium/drivers/softpipe/Makefile
src/gallium/drivers/svga/Makefile
+   src/gallium/drivers/threadsafe/Makefile
src/gallium/drivers/trace/Makefile
src/gallium/drivers/vc4/Makefile
src/gallium/state_trackers/clover/Makefile
diff --git a/src/gallium/Makefile.am b/src/gallium/Makefile.am
index ede6e21..2290583 100644
--- a/src/gallium/Makefile.am
+++ b/src/gallium/Makefile.am
@@ -12,6 +12,7 @@ SUBDIRS += auxiliary
 
 SUBDIRS += \
drivers/noop \
+   drivers/threadsafe \
drivers/trace \
drivers/rbug
 
diff --git a/src/gallium/SConscript b/src/gallium/SConscript
index eeb1c78..e6070b6 100644
--- a/src/gallium/SConscript
+++ b/src/gallium/SConscript
@@ -17,6 +17,7 @@ SConscript([
 'drivers/softpipe/SConscript',
 'drivers/svga/SConscript',
 'drivers/trace/SConscript',
+'drivers/threadsafe/SConscript',
 ])
 
 #
diff --git a/src/gallium/drivers/threadsafe/Makefile.am 
b/src/gallium/drivers/threadsafe/Makefile.am
new file mode 100644
index 000..bab64bf
--- /dev/null
+++ b/src/gallium/drivers/threadsafe/Makefile.am
@@ -0,0 +1,11 @@
+include Makefile.sources
+include $(top_srcdir)/src/gallium/Automake.inc
+
+AM_CFLAGS = \
+   $(GALLIUM_DRIVER_CFLAGS)
+
+noinst_LTLIBRARIES = libthreadsafe.la
+
+libthreadsafe_la_SOURCES = $(C_SOURCES)
+
+EXTRA_DIST = SConscript
diff --git a/src/gallium/drivers/threadsafe/Makefile.sources 
b/src/gallium/drivers/threadsafe/Makefile.sources
new file mode 100644
index 000..5e8778e
--- /dev/null
+++ b/src/gallium/drivers/threadsafe/Makefile.sources
@@ -0,0 +1,3 @@
+C_SOURCES := \
+   threadsafe.h \
+   threadsafe_context.c
diff --git a/src/gallium/drivers/threadsafe/SConscript 
b/src/gallium/drivers/threadsafe/SConscript
new file mode 100644
index 000..bce
--- /dev/null
+++ b/src/gallium/drivers/threadsafe/SConscript
@@ -0,0 +1,13 @@
+###
+# SConscript for noop convenience library
+
+Import('*')
+
+env = env.Clone()
+
+threadsafe = env.ConvenienceLibrary(
+   target = 'threadsafe',
+   source = env.ParseSourceList('Makefile.sources', 'C_SOURCES')
+   ) + extra
+
+Export('threadsafe')
diff --git a/src/gallium/drivers/threadsafe/threadsafe.h 
b/src/gallium/drivers/threadsafe/threadsafe.h
new file mode 100644
index 000..60614ed
--- /dev/null
+++ b/src/gallium/drivers/threadsafe/threadsafe.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright 2015 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 
FROM,
+ * OUT OF

[Mesa-dev] [PATCH 2/2] clover: Use threadsafe wrappers for pipe_context v2

2015-07-10 Thread Tom Stellard

Events can be added to an OpenCL command queue concurrently from multiple
threads, but pipe_context bjects are not threadsafe.  The threadsafe
wrappers protect all pipe_context function calls with a mutex, so we
can safely use them with multiple threads.

v2:
  - Don't use wrapper for pipe_screen.

CC: 10.6 
---
 src/gallium/state_trackers/clover/core/queue.cpp | 2 ++
 src/gallium/targets/opencl/Makefile.am   | 4 +++-
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/src/gallium/state_trackers/clover/core/queue.cpp 
b/src/gallium/state_trackers/clover/core/queue.cpp
index 87f9dcc..16b089c 100644
--- a/src/gallium/state_trackers/clover/core/queue.cpp
+++ b/src/gallium/state_trackers/clover/core/queue.cpp
@@ -24,6 +24,7 @@
 #include "core/event.hpp"
 #include "pipe/p_screen.h"
 #include "pipe/p_context.h"
+#include "threadsafe/threadsafe.h"
 
 using namespace clover;
 
@@ -33,6 +34,7 @@ command_queue::command_queue(clover::context &ctx, 
clover::device &dev,
pipe = dev.pipe->context_create(dev.pipe, NULL);
if (!pipe)
   throw error(CL_INVALID_DEVICE);
+   pipe = pipe_threadsafe_context(pipe);
 }
 
 command_queue::~command_queue() {
diff --git a/src/gallium/targets/opencl/Makefile.am 
b/src/gallium/targets/opencl/Makefile.am
index 70e60e2..be5a59d 100644
--- a/src/gallium/targets/opencl/Makefile.am
+++ b/src/gallium/targets/opencl/Makefile.am
@@ -16,6 +16,7 @@ endif
 
 lib@OPENCL_LIBNAME@_la_LIBADD = \

$(top_builddir)/src/gallium/auxiliary/pipe-loader/libpipe_loader_client.la \
+   $(top_builddir)/src/gallium/drivers/threadsafe/libthreadsafe.la \
$(top_builddir)/src/gallium/state_trackers/clover/libclover.la \
$(top_builddir)/src/gallium/auxiliary/libgallium.la \
$(top_builddir)/src/util/libmesautil.la \
@@ -36,7 +37,8 @@ lib@OPENCL_LIBNAME@_la_LIBADD = \
-lclangEdit \
-lclangLex \
-lclangBasic \
-   $(LLVM_LIBS)
+   $(LLVM_LIBS) \
+   $(PTHREAD_LIBS)
 
 nodist_EXTRA_lib@OPENCL_LIBNAME@_la_SOURCES = dummy.cpp
 lib@OPENCL_LIBNAME@_la_SOURCES =
-- 
2.0.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 1/6] radeonsi: remove redundant parameter in si_shader_binary_read

2015-07-16 Thread Tom Stellard

On Thu, Jul 16, 2015 at 06:55:26PM +0200, Marek Olšák wrote:
> From: Marek Olšák 
> 

Reviewed-by: Tom Stellard 

> ---
>  src/gallium/drivers/radeonsi/si_compute.c | 2 +-
>  src/gallium/drivers/radeonsi/si_shader.c  | 8 +++-
>  src/gallium/drivers/radeonsi/si_shader.h  | 3 +--
>  3 files changed, 5 insertions(+), 8 deletions(-)
> 
> diff --git a/src/gallium/drivers/radeonsi/si_compute.c 
> b/src/gallium/drivers/radeonsi/si_compute.c
> index 89bef2e..0361c99 100644
> --- a/src/gallium/drivers/radeonsi/si_compute.c
> +++ b/src/gallium/drivers/radeonsi/si_compute.c
> @@ -144,7 +144,7 @@ static void *si_create_compute_state(
>* the shader code to the GPU.
>*/
>   init_scratch_buffer(sctx, program);
> - si_shader_binary_read(sctx->screen, &program->shader, 
> &program->shader.binary);
> + si_shader_binary_read(sctx->screen, &program->shader);
>  
>  #endif
>   program->input_buffer = si_resource_create_custom(sctx->b.b.screen,
> diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
> b/src/gallium/drivers/radeonsi/si_shader.c
> index 75a29ae..b988f6d 100644
> --- a/src/gallium/drivers/radeonsi/si_shader.c
> +++ b/src/gallium/drivers/radeonsi/si_shader.c
> @@ -2686,11 +2686,9 @@ void si_shader_apply_scratch_relocs(struct si_context 
> *sctx,
>   }
>  }
>  
> -int si_shader_binary_read(struct si_screen *sscreen,
> - struct si_shader *shader,
> - const struct radeon_shader_binary *binary)
> +int si_shader_binary_read(struct si_screen *sscreen, struct si_shader 
> *shader)
>  {
> -
> + const struct radeon_shader_binary *binary = &shader->binary;
>   unsigned i;
>   unsigned code_size;
>   unsigned char *ptr;
> @@ -2750,7 +2748,7 @@ int si_compile_llvm(struct si_screen *sscreen, struct 
> si_shader *shader,
>   if (r) {
>   return r;
>   }
> - r = si_shader_binary_read(sscreen, shader, &shader->binary);
> + r = si_shader_binary_read(sscreen, shader);
>  
>   FREE(shader->binary.config);
>   FREE(shader->binary.rodata);
> diff --git a/src/gallium/drivers/radeonsi/si_shader.h 
> b/src/gallium/drivers/radeonsi/si_shader.h
> index 8d309b4..1e8b52b 100644
> --- a/src/gallium/drivers/radeonsi/si_shader.h
> +++ b/src/gallium/drivers/radeonsi/si_shader.h
> @@ -191,8 +191,7 @@ int si_compile_llvm(struct si_screen *sscreen, struct 
> si_shader *shader,
>   LLVMTargetMachineRef tm, LLVMModuleRef mod);
>  void si_shader_destroy(struct pipe_context *ctx, struct si_shader *shader);
>  unsigned si_shader_io_get_unique_index(unsigned semantic_name, unsigned 
> index);
> -int si_shader_binary_read(struct si_screen *sscreen, struct si_shader 
> *shader,
> - const struct radeon_shader_binary *binary);
> +int si_shader_binary_read(struct si_screen *sscreen, struct si_shader 
> *shader);
>  void si_shader_apply_scratch_relocs(struct si_context *sctx,
>   struct si_shader *shader,
>   uint64_t scratch_va);
> -- 
> 2.1.0
> 
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 2/6] radeonsi: upload shader rodata after updating scratch relocations

2015-07-16 Thread Tom Stellard

On Thu, Jul 16, 2015 at 06:55:27PM +0200, Marek Olšák wrote:
> From: Marek Olšák 
> 

Reviewed-by: Tom Stellard 
> Cc: 10.5 10.6 
> ---
>  src/gallium/drivers/radeonsi/si_shader.c| 49 
> ++---
>  src/gallium/drivers/radeonsi/si_shader.h|  1 +
>  src/gallium/drivers/radeonsi/si_state_shaders.c |  8 +---
>  3 files changed, 29 insertions(+), 29 deletions(-)
> 
> diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
> b/src/gallium/drivers/radeonsi/si_shader.c
> index b988f6d..955e780 100644
> --- a/src/gallium/drivers/radeonsi/si_shader.c
> +++ b/src/gallium/drivers/radeonsi/si_shader.c
> @@ -2686,16 +2686,41 @@ void si_shader_apply_scratch_relocs(struct si_context 
> *sctx,
>   }
>  }
>  
> +int si_shader_binary_upload(struct si_screen *sscreen, struct si_shader 
> *shader)
> +{
> + const struct radeon_shader_binary *binary = &shader->binary;
> + unsigned code_size = binary->code_size + binary->rodata_size;
> + unsigned char *ptr;
> +
> + r600_resource_reference(&shader->bo, NULL);
> + shader->bo = si_resource_create_custom(&sscreen->b.b,
> +PIPE_USAGE_IMMUTABLE,
> +code_size);
> + if (!shader->bo)
> + return -ENOMEM;
> +
> + ptr = sscreen->b.ws->buffer_map(shader->bo->cs_buf, NULL,
> + PIPE_TRANSFER_READ_WRITE);
> + util_memcpy_cpu_to_le32(ptr, binary->code, binary->code_size);
> + if (binary->rodata_size > 0) {
> + ptr += binary->code_size;
> + util_memcpy_cpu_to_le32(ptr, binary->rodata,
> + binary->rodata_size);
> + }
> +
> + sscreen->b.ws->buffer_unmap(shader->bo->cs_buf);
> + return 0;
> +}
> +
>  int si_shader_binary_read(struct si_screen *sscreen, struct si_shader 
> *shader)
>  {
>   const struct radeon_shader_binary *binary = &shader->binary;
>   unsigned i;
> - unsigned code_size;
> - unsigned char *ptr;
>   bool dump  = r600_can_dump_shader(&sscreen->b,
>   shader->selector ? shader->selector->tokens : NULL);
>  
>   si_shader_binary_read_config(sscreen, shader, 0);
> + si_shader_binary_upload(sscreen, shader);
>  
>   if (dump) {
>   if (!binary->disassembled) {
> @@ -2713,26 +2738,6 @@ int si_shader_binary_read(struct si_screen *sscreen, 
> struct si_shader *shader)
>   shader->num_sgprs, shader->num_vgprs, binary->code_size,
>   shader->lds_size, shader->scratch_bytes_per_wave);
>   }
> -
> - /* copy new shader */
> - code_size = binary->code_size + binary->rodata_size;
> - r600_resource_reference(&shader->bo, NULL);
> - shader->bo = si_resource_create_custom(&sscreen->b.b, 
> PIPE_USAGE_IMMUTABLE,
> -code_size);
> - if (shader->bo == NULL) {
> - return -ENOMEM;
> - }
> -
> -
> - ptr = sscreen->b.ws->buffer_map(shader->bo->cs_buf, NULL, 
> PIPE_TRANSFER_READ_WRITE);
> - util_memcpy_cpu_to_le32(ptr, binary->code, binary->code_size);
> - if (binary->rodata_size > 0) {
> - ptr += binary->code_size;
> - util_memcpy_cpu_to_le32(ptr, binary->rodata, 
> binary->rodata_size);
> - }
> -
> - sscreen->b.ws->buffer_unmap(shader->bo->cs_buf);
> -
>   return 0;
>  }
>  
> diff --git a/src/gallium/drivers/radeonsi/si_shader.h 
> b/src/gallium/drivers/radeonsi/si_shader.h
> index 1e8b52b..c12782f 100644
> --- a/src/gallium/drivers/radeonsi/si_shader.h
> +++ b/src/gallium/drivers/radeonsi/si_shader.h
> @@ -191,6 +191,7 @@ int si_compile_llvm(struct si_screen *sscreen, struct 
> si_shader *shader,
>   LLVMTargetMachineRef tm, LLVMModuleRef mod);
>  void si_shader_destroy(struct pipe_context *ctx, struct si_shader *shader);
>  unsigned si_shader_io_get_unique_index(unsigned semantic_name, unsigned 
> index);
> +int si_shader_binary_upload(struct si_screen *sscreen, struct si_shader 
> *shader);
>  int si_shader_binary_read(struct si_screen *sscreen, struct si_shader 
> *shader);
>  void si_shader_apply_scratch_relocs(struct si_context *sctx,
>   struct si_shader *shader,
> diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c 
> b/src/gallium/drivers/radeonsi/si_state_shaders.c
> index a842d9d..12bce9a 100644
> --- a/src/gallium/drivers/rad

Re: [Mesa-dev] [PATCH] clover: Fix bug with computing hard_event status

2015-07-16 Thread Tom Stellard

On Sat, Jul 11, 2015 at 02:35:53PM +0300, Francisco Jerez wrote:
> Tom Stellard  writes:
> 
> > pipe_context::flush() can return a NULL fence if the queue is already
> > empty, so we should not assume that an event with a NULL fence
> > has the status of CL_QUEUED.
> >
> 
> This seems suspicious...  On the one hand it doesn't seem to be a
> documented "feature" of pipe_context::flush to return NULL except in
> error conditions (I'm pretty sure other drivers like nouveau won't), and
> it seems like it could easily break assumptions of other state trackers.
> 
> IMO pipe_context::flush() should respect the invariant that whatever is
> returned in the fence output argument (unless some error occurred) be a
> valid argument for pipe_screen::fence_finish() and ::fence_signalled()
> -- I don't think NULL is?
> 
> On the other hand this leaves me wondering how could the queue already
> be empty when clover calls pipe_context::flush() -- I assume by queue
> you mean the pipe driver's?  The fact that clover calls
> pipe_context::flush() implies that clover's event queue is not empty
> (i.e. there have been commands enqueued to the pipe driver since the
> last call to pipe_context::flush()).  It sounds like this mismatch
> between clover's and the pipe driver's command queue might be caused by
> some race condition elsewhere?
> 
> Thanks.
> 

The bug appears in programs which call clFinish() without ever
adding anything to the command queue.  In this case, radeonsi
sees that no commands have been submitted to the GPU, so it doesn't
submit the fence and sets the fence parameter to NULL.

-Tom


> > CC: 10.6 
> > ---
> >  src/gallium/state_trackers/clover/core/event.cpp | 7 ---
> >  src/gallium/state_trackers/clover/core/event.hpp | 1 +
> >  2 files changed, 5 insertions(+), 3 deletions(-)
> >
> > diff --git a/src/gallium/state_trackers/clover/core/event.cpp 
> > b/src/gallium/state_trackers/clover/core/event.cpp
> > index d75b839..b973c78 100644
> > --- a/src/gallium/state_trackers/clover/core/event.cpp
> > +++ b/src/gallium/state_trackers/clover/core/event.cpp
> > @@ -118,7 +118,7 @@ event::wait() const {
> >  hard_event::hard_event(command_queue &q, cl_command_type command,
> > const ref_vector &deps, action action) :
> > event(q.context(), deps, profile(q, action), [](event &ev){}),
> > -   _queue(q), _command(command), _fence(NULL) {
> > +   _queue(q), _command(command), _fence(NULL), _fenced(false) {
> > if (q.profiling_enabled())
> >_time_queued = timestamp::current(q);
> >  
> > @@ -138,7 +138,7 @@ hard_event::status() const {
> > if (event::status() < 0)
> >return event::status();
> >  
> > -   else if (!_fence)
> > +   else if (!_fenced)
> >return CL_QUEUED;
> >  
> > else if (!screen->fence_finish(screen, _fence, 0))
> > @@ -167,7 +167,7 @@ hard_event::wait() const {
> > if (status() == CL_QUEUED)
> >queue()->flush();
> >  
> > -   if (!_fence ||
> > +   if (!_fenced ||
> > !screen->fence_finish(screen, _fence, PIPE_TIMEOUT_INFINITE))
> >throw error(CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST);
> >  }
> > @@ -196,6 +196,7 @@ void
> >  hard_event::fence(pipe_fence_handle *fence) {
> > pipe_screen *screen = queue()->device().pipe;
> > screen->fence_reference(screen, &_fence, fence);
> > +   _fenced = true;
> >  }
> >  
> >  event::action
> > diff --git a/src/gallium/state_trackers/clover/core/event.hpp 
> > b/src/gallium/state_trackers/clover/core/event.hpp
> > index 6469e48..fac62d2 100644
> > --- a/src/gallium/state_trackers/clover/core/event.hpp
> > +++ b/src/gallium/state_trackers/clover/core/event.hpp
> > @@ -137,6 +137,7 @@ namespace clover {
> >const intrusive_ref _queue;
> >cl_command_type _command;
> >pipe_fence_handle *_fence;
> > +  bool _fenced;
> >lazy _time_queued, _time_submit, _time_start, _time_end;
> > };
> >  
> > -- 
> > 2.0.4




> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 1/3] gallivm: Don't use raw_debug_ostream for dissasembling

2015-07-20 Thread Tom Stellard

All LLVM API calls that require an ostream object have been removed from
the disassemble() function, so we don't need to use this class to wrap
_debug_printf() we can just call this function directly.
---
 src/gallium/auxiliary/gallivm/lp_bld_debug.cpp | 27 +-
 1 file changed, 13 insertions(+), 14 deletions(-)

diff --git a/src/gallium/auxiliary/gallivm/lp_bld_debug.cpp 
b/src/gallium/auxiliary/gallivm/lp_bld_debug.cpp
index 405e648..ec88f33 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_debug.cpp
+++ b/src/gallium/auxiliary/gallivm/lp_bld_debug.cpp
@@ -123,7 +123,7 @@ lp_debug_dump_value(LLVMValueRef value)
  * - http://blog.llvm.org/2010/04/intro-to-llvm-mc-project.html
  */
 static size_t
-disassemble(const void* func, llvm::raw_ostream & Out)
+disassemble(const void* func)
 {
const uint8_t *bytes = (const uint8_t *)func;
 
@@ -141,7 +141,8 @@ disassemble(const void* func, llvm::raw_ostream & Out)
char outline[1024];
 
if (!D) {
-  Out << "error: couldn't create disassembler for triple " << Triple << 
"\n";
+  _debug_printf("error: couldn't create disassembler for triple %s\n",
+Triple.c_str());
   return 0;
}
 
@@ -155,13 +156,13 @@ disassemble(const void* func, llvm::raw_ostream & Out)
* so that between runs.
*/
 
-  Out << llvm::format("%6lu:\t", (unsigned long)pc);
+  _debug_printf("%6lu:\t", (unsigned long)pc);
 
   Size = LLVMDisasmInstruction(D, (uint8_t *)bytes + pc, extent - pc, 0, 
outline,
sizeof outline);
 
   if (!Size) {
- Out << "invalid\n";
+ _debug_printf("invalid\n");
  pc += 1;
  break;
   }
@@ -173,10 +174,10 @@ disassemble(const void* func, llvm::raw_ostream & Out)
   if (0) {
  unsigned i;
  for (i = 0; i < Size; ++i) {
-Out << llvm::format("%02x ", bytes[pc + i]);
+_debug_printf("%02x ", bytes[pc + i]);
  }
  for (; i < 16; ++i) {
-Out << "   ";
+_debug_printf("   ");
  }
   }
 
@@ -184,9 +185,9 @@ disassemble(const void* func, llvm::raw_ostream & Out)
* Print the instruction.
*/
 
-  Out << outline;
+  _debug_printf("%*s", Size, outline);
 
-  Out << "\n";
+  _debug_printf("\n");
 
   /*
* Stop disassembling on return statements, if there is no record of a
@@ -206,13 +207,12 @@ disassemble(const void* func, llvm::raw_ostream & Out)
   pc += Size;
 
   if (pc >= extent) {
- Out << "disassembly larger than " << extent << "bytes, aborting\n";
+ _debug_printf("disassembly larger than %ull bytes, aborting\n", 
extent);
  break;
   }
}
 
-   Out << "\n";
-   Out.flush();
+   _debug_printf("\n");
 
LLVMDisasmDispose(D);
 
@@ -229,9 +229,8 @@ disassemble(const void* func, llvm::raw_ostream & Out)
 
 extern "C" void
 lp_disassemble(LLVMValueRef func, const void *code) {
-   raw_debug_ostream Out;
-   Out << LLVMGetValueName(func) << ":\n";
-   disassemble(code, Out);
+   _debug_printf("%s:\n", LLVMGetValueName(func));
+   disassemble(code);
 }
 
 
-- 
1.9.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 2/3] gallivm: Add ifdefs so raw_debug_stream is only defined when used

2015-07-20 Thread Tom Stellard

Its only use is to implement a custom version of LLVMDumpValue
on some Windows and embedded platforms.
---
 src/gallium/auxiliary/gallivm/lp_bld_debug.cpp | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/gallium/auxiliary/gallivm/lp_bld_debug.cpp 
b/src/gallium/auxiliary/gallivm/lp_bld_debug.cpp
index ec88f33..0a5c2cc 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_debug.cpp
+++ b/src/gallium/auxiliary/gallivm/lp_bld_debug.cpp
@@ -61,6 +61,7 @@ lp_check_alignment(const void *ptr, unsigned alignment)
return ((uintptr_t)ptr & (alignment - 1)) == 0;
 }
 
+#if (defined(PIPE_OS_WINDOWS) && !defined(PIPE_CC_MSVC)) || 
defined(PIPE_OS_EMBEDDED)
 
 class raw_debug_ostream :
public llvm::raw_ostream
@@ -91,6 +92,7 @@ raw_debug_ostream::write_impl(const char *Ptr, size_t Size)
}
 }
 
+#endif
 
 extern "C" const char *
 lp_get_module_id(LLVMModuleRef module)
-- 
1.9.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 3/3] gallivm: Initialize LLVM Modules's DataLayout to an empty string.

2015-07-20 Thread Tom Stellard

This fixes crashes in some piglit tests on radeonsi that use the draw
module, and llvmpipe is likely completely broken without this on LLVM
3.8.

This is just a temporary solution.  The correct solution will require
creating a TargetMachine during gallivm initialization and pulling the
DataLayout from there.  This will be a somewhat invasive change, and it
will need to be validatated on multiple LLVM versions.
---
 src/gallium/auxiliary/gallivm/lp_bld_init.c | 28 +++-
 1 file changed, 23 insertions(+), 5 deletions(-)

diff --git a/src/gallium/auxiliary/gallivm/lp_bld_init.c 
b/src/gallium/auxiliary/gallivm/lp_bld_init.c
index 384ea86..017d075 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_init.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_init.c
@@ -106,7 +106,6 @@ enum LLVM_CodeGenOpt_Level {
 static boolean
 create_pass_manager(struct gallivm_state *gallivm)
 {
-   char *td_str;
assert(!gallivm->passmgr);
assert(gallivm->target);
 
@@ -122,10 +121,29 @@ create_pass_manager(struct gallivm_state *gallivm)
// Old versions of LLVM get the DataLayout from the pass manager.
LLVMAddTargetData(gallivm->target, gallivm->passmgr);
 
-   // New ones from the Module.
-   td_str = LLVMCopyStringRepOfTargetData(gallivm->target);
-   LLVMSetDataLayout(gallivm->module, td_str);
-   free(td_str);
+   /* Setting the module's DataLayout to an empty string will cause the
+* ExecutionEngine to copy to the DataLayout string from its target
+* machine to the module.  As of LLVM 3.8 the module and the execution
+* engine are required to have the same DataLayout.
+*
+* TODO: This is just a temporary work-around.  The correct solution is
+* for gallivm_init_state() to create a TargetMachine and pull the
+* DataLayout from there.  Currently, the TargetMachine used by llvmpipe
+* is being implicitly created by the EngineBuilder in
+* lp_build_create_jit_compiler_for_module()
+*/
+
+#if HAVE_LLVM < 0x0308
+   {
+  char *td_str;
+  // New ones from the Module.
+  td_str = LLVMCopyStringRepOfTargetData(gallivm->target);
+  LLVMSetDataLayout(gallivm->module, td_str);
+  free(td_str);
+   }
+#else
+   LLVMSetDataLayout(gallivm->module, "");
+#endif
 
if ((gallivm_debug & GALLIVM_DEBUG_NO_OPT) == 0) {
   /* These are the passes currently listed in llvm-c/Transforms/Scalar.h,
-- 
1.9.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 03/10] radeonsi: initialize output, temp, and address registers to "undef"

2015-10-15 Thread Tom Stellard

On Sun, Oct 11, 2015 at 03:29:43AM +0200, Marek Olšák wrote:
> From: Marek Olšák 
> 
> This removes "v_mov v0, 0" which typically occurs before exports.
> 

Reviewed-by: Tom Stellard 

> Totals:
> SGPRS: 345216 -> 344552 (-0.19 %)
> VGPRS: 197684 -> 197132 (-0.28 %)
> Code Size: 7390408 -> 7375376 (-0.20 %) bytes
> LDS: 91 -> 91 (0.00 %) blocks
> Scratch: 1842176 -> 1679360 (-8.84 %) bytes per wave
> 
> Totals from affected shaders:
> SGPRS: 101336 -> 100672 (-0.66 %)
> VGPRS: 53920 -> 53368 (-1.02 %)
> Code Size: 2170176 -> 2155144 (-0.69 %) bytes
> LDS: 2 -> 2 (0.00 %) blocks
> Scratch: 1015808 -> 852992 (-16.03 %) bytes per wave
> ---
>  src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c | 19 +++
>  1 file changed, 15 insertions(+), 4 deletions(-)
> 
> diff --git a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c 
> b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
> index 2e9a013..f548d1a 100644
> --- a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
> +++ b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
> @@ -272,6 +272,15 @@ static LLVMValueRef fetch_system_value(
>   return bitcast(bld_base, type, cval);
>  }
>  
> +static LLVMValueRef si_build_alloca_undef(struct gallivm_state *gallivm,
> +   LLVMTypeRef type,
> +   const char *name)
> +{
> + LLVMValueRef ptr = lp_build_alloca(gallivm, type, name);
> + LLVMBuildStore(gallivm->builder, LLVMGetUndef(type), ptr);
> + return ptr;
> +}
> +
>  static void emit_declaration(
>   struct lp_build_tgsi_context * bld_base,
>   const struct tgsi_full_declaration *decl)
> @@ -285,7 +294,7 @@ static void emit_declaration(
>   for (idx = decl->Range.First; idx <= decl->Range.Last; idx++) {
>   unsigned chan;
>   for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
> -  ctx->soa.addr[idx][chan] = lp_build_alloca(
> +  ctx->soa.addr[idx][chan] = 
> si_build_alloca_undef(
>   &ctx->gallivm,
>   ctx->soa.bld_base.uint_bld.elem_type, 
> "");
>   }
> @@ -315,8 +324,9 @@ static void emit_declaration(
>   for (idx = first; idx <= last; idx++) {
>   for (i = 0; i < TGSI_NUM_CHANNELS; i++) {
>   ctx->temps[idx * TGSI_NUM_CHANNELS + i] =
> - lp_build_alloca(bld_base->base.gallivm, 
> bld_base->base.vec_type,
> - "temp");
> + 
> si_build_alloca_undef(bld_base->base.gallivm,
> +   
> bld_base->base.vec_type,
> +   "temp");
>   }
>   }
>   break;
> @@ -347,7 +357,8 @@ static void emit_declaration(
>   unsigned chan;
>   assert(idx < RADEON_LLVM_MAX_OUTPUTS);
>   for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
> - ctx->soa.outputs[idx][chan] = 
> lp_build_alloca(&ctx->gallivm,
> + ctx->soa.outputs[idx][chan] = 
> si_build_alloca_undef(
> + &ctx->gallivm,
>   ctx->soa.bld_base.base.elem_type, "");
>   }
>   }
> -- 
> 2.1.4
> 
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 04/10] radeonsi: don't emit AMDGPU intrinsics for EX2, ROUND, TRUNC

2015-10-15 Thread Tom Stellard

On Sun, Oct 11, 2015 at 03:29:44AM +0200, Marek Olšák wrote:
> From: Marek Olšák 
> 

Reviewed-by: Tom Stellard 

> No difference according to shader-db.
> ---
>  src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c | 6 +++---
>  1 file changed, 3 insertions(+), 3 deletions(-)
> 
> diff --git a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c 
> b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
> index f548d1a..91cf658 100644
> --- a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
> +++ b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
> @@ -1481,7 +1481,7 @@ void radeon_llvm_context_init(struct 
> radeon_llvm_context * ctx)
>   bld_base->op_actions[TGSI_OPCODE_ENDIF].emit = endif_emit;
>   bld_base->op_actions[TGSI_OPCODE_ENDLOOP].emit = endloop_emit;
>   bld_base->op_actions[TGSI_OPCODE_EX2].emit = build_tgsi_intrinsic_nomem;
> - bld_base->op_actions[TGSI_OPCODE_EX2].intr_name = "llvm.AMDIL.exp.";
> + bld_base->op_actions[TGSI_OPCODE_EX2].intr_name = "llvm.exp2.f32";
>   bld_base->op_actions[TGSI_OPCODE_FLR].emit = build_tgsi_intrinsic_nomem;
>   bld_base->op_actions[TGSI_OPCODE_FLR].intr_name = "llvm.floor.f32";
>   bld_base->op_actions[TGSI_OPCODE_FMA].emit = build_tgsi_intrinsic_nomem;
> @@ -1530,7 +1530,7 @@ void radeon_llvm_context_init(struct 
> radeon_llvm_context * ctx)
>   bld_base->op_actions[TGSI_OPCODE_POW].emit = build_tgsi_intrinsic_nomem;
>   bld_base->op_actions[TGSI_OPCODE_POW].intr_name = "llvm.pow.f32";
>   bld_base->op_actions[TGSI_OPCODE_ROUND].emit = 
> build_tgsi_intrinsic_nomem;
> - bld_base->op_actions[TGSI_OPCODE_ROUND].intr_name = 
> "llvm.AMDIL.round.nearest.";
> + bld_base->op_actions[TGSI_OPCODE_ROUND].intr_name = "llvm.rint.f32";
>   bld_base->op_actions[TGSI_OPCODE_RSQ].intr_name = 
> "llvm.AMDGPU.rsq.clamped.f32";
>   bld_base->op_actions[TGSI_OPCODE_RSQ].emit = build_tgsi_intrinsic_nomem;
>   bld_base->op_actions[TGSI_OPCODE_SGE].emit = emit_cmp;
> @@ -1546,7 +1546,7 @@ void radeon_llvm_context_init(struct 
> radeon_llvm_context * ctx)
>   bld_base->op_actions[TGSI_OPCODE_SQRT].intr_name = "llvm.sqrt.f32";
>   bld_base->op_actions[TGSI_OPCODE_SSG].emit = emit_ssg;
>   bld_base->op_actions[TGSI_OPCODE_TRUNC].emit = 
> build_tgsi_intrinsic_nomem;
> - bld_base->op_actions[TGSI_OPCODE_TRUNC].intr_name = "llvm.AMDGPU.trunc";
> + bld_base->op_actions[TGSI_OPCODE_TRUNC].intr_name = "llvm.trunc.f32";
>   bld_base->op_actions[TGSI_OPCODE_UADD].emit = emit_uadd;
>   bld_base->op_actions[TGSI_OPCODE_UBFE].emit = 
> build_tgsi_intrinsic_nomem;
>   bld_base->op_actions[TGSI_OPCODE_UBFE].intr_name = 
> "llvm.AMDGPU.bfe.u32";
> -- 
> 2.1.4
> 
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 05/10] radeonsi: don't emit AMDGPU intrinsics for integer abs, min, max

2015-10-15 Thread Tom Stellard

On Sun, Oct 11, 2015 at 03:29:45AM +0200, Marek Olšák wrote:
> From: Marek Olšák 
> 

Reviewed-by: Tom Stellard 

> No difference according to shader-db. (with the new S_ABS_I32 pattern)
> ---
>  .../drivers/radeon/radeon_setup_tgsi_llvm.c| 60 
> ++
>  1 file changed, 50 insertions(+), 10 deletions(-)
> 
> diff --git a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c 
> b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
> index 91cf658..23ea23a 100644
> --- a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
> +++ b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
> @@ -1393,6 +1393,51 @@ static void emit_imsb(const struct 
> lp_build_tgsi_action * action,
>   LLVMBuildSelect(builder, cond, all_ones, msb, "");
>  }
>  
> +static void emit_iabs(const struct lp_build_tgsi_action *action,
> +   struct lp_build_tgsi_context *bld_base,
> +   struct lp_build_emit_data *emit_data)
> +{
> + LLVMBuilderRef builder = bld_base->base.gallivm->builder;
> +
> + emit_data->output[emit_data->chan] =
> + lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_IMAX,
> +   emit_data->args[0],
> +   LLVMBuildNeg(builder,
> +emit_data->args[0], ""));
> +}
> +
> +static void emit_minmax_int(const struct lp_build_tgsi_action *action,
> + struct lp_build_tgsi_context *bld_base,
> + struct lp_build_emit_data *emit_data)
> +{
> + LLVMBuilderRef builder = bld_base->base.gallivm->builder;
> + LLVMIntPredicate op;
> +
> + switch (emit_data->info->opcode) {
> + default:
> + assert(0);
> + case TGSI_OPCODE_IMAX:
> + op = LLVMIntSGT;
> + break;
> + case TGSI_OPCODE_IMIN:
> + op = LLVMIntSLT;
> + break;
> + case TGSI_OPCODE_UMAX:
> + op = LLVMIntUGT;
> + break;
> + case TGSI_OPCODE_UMIN:
> + op = LLVMIntULT;
> + break;
> + }
> +
> + emit_data->output[emit_data->chan] =
> + LLVMBuildSelect(builder,
> + LLVMBuildICmp(builder, op, emit_data->args[0],
> +   emit_data->args[1], ""),
> + emit_data->args[0],
> + emit_data->args[1], "");
> +}
> +
>  void radeon_llvm_context_init(struct radeon_llvm_context * ctx)
>  {
>   struct lp_type type;
> @@ -1493,17 +1538,14 @@ void radeon_llvm_context_init(struct 
> radeon_llvm_context * ctx)
>   bld_base->op_actions[TGSI_OPCODE_FSGE].emit = emit_fcmp;
>   bld_base->op_actions[TGSI_OPCODE_FSLT].emit = emit_fcmp;
>   bld_base->op_actions[TGSI_OPCODE_FSNE].emit = emit_fcmp;
> - bld_base->op_actions[TGSI_OPCODE_IABS].emit = 
> build_tgsi_intrinsic_nomem;
> - bld_base->op_actions[TGSI_OPCODE_IABS].intr_name = "llvm.AMDIL.abs.";
> + bld_base->op_actions[TGSI_OPCODE_IABS].emit = emit_iabs;
>   bld_base->op_actions[TGSI_OPCODE_IBFE].emit = 
> build_tgsi_intrinsic_nomem;
>   bld_base->op_actions[TGSI_OPCODE_IBFE].intr_name = 
> "llvm.AMDGPU.bfe.i32";
>   bld_base->op_actions[TGSI_OPCODE_IDIV].emit = emit_idiv;
>   bld_base->op_actions[TGSI_OPCODE_IF].emit = if_emit;
>   bld_base->op_actions[TGSI_OPCODE_UIF].emit = uif_emit;
> - bld_base->op_actions[TGSI_OPCODE_IMAX].emit = 
> build_tgsi_intrinsic_nomem;
> - bld_base->op_actions[TGSI_OPCODE_IMAX].intr_name = "llvm.AMDGPU.imax";
> - bld_base->op_actions[TGSI_OPCODE_IMIN].emit = 
> build_tgsi_intrinsic_nomem;
> - bld_base->op_actions[TGSI_OPCODE_IMIN].intr_name = "llvm.AMDGPU.imin";
> + bld_base->op_actions[TGSI_OPCODE_IMAX].emit = emit_minmax_int;
> + bld_base->op_actions[TGSI_OPCODE_IMIN].emit = emit_minmax_int;
>   bld_base->op_actions[TGSI_OPCODE_IMSB].emit = emit_imsb;
>   bld_base->op_actions[TGSI_OPCODE_INEG].emit = emit_ineg;
>   bld_base->op_actions[TGSI_OPCODE_ISHR].emit = emit_ishr;
> @@ -1551,10 +1593,8 @@ void radeon_llvm_context_init(struct 
> radeon_llvm_context * ctx)
>   bld_base->op_actions[TGSI_OPCODE_UBFE].emit = 
> build_tgsi_intrinsic_nomem;
>   bld_base->op_actions[TGSI_OPCODE_UBFE].intr_name = 
> "llvm.AMDGPU.bfe.u32";
>   bld_base->op_actions[TGSI_OPCODE_UDIV].emit = emit_udiv;
> - bld_base->op

Re: [Mesa-dev] [PATCH 07/10] radeonsi: don't use the AMDGPU intrinsic for CMP

2015-10-15 Thread Tom Stellard

On Sun, Oct 11, 2015 at 03:29:47AM +0200, Marek Olšák wrote:
> From: Marek Olšák 
> 

Reviewed-by: Tom Stellard 

> The increase in VGPRs in unfortunate, but the decrease in the scratch size
> is always welcome.
> 
> Totals:
> SGPRS: 344552 -> 344368 (-0.05 %)
> VGPRS: 197132 -> 197552 (0.21 %)
> Code Size: 7375376 -> 7366304 (-0.12 %) bytes
> LDS: 91 -> 91 (0.00 %) blocks
> Scratch: 1679360 -> 1615872 (-3.78 %) bytes per wave
> 
> Totals from affected shaders:
> SGPRS: 47736 -> 47552 (-0.39 %)
> VGPRS: 27952 -> 28372 (1.50 %)
> Code Size: 1392724 -> 1383652 (-0.65 %) bytes
> LDS: 39 -> 39 (0.00 %) blocks
> Scratch: 513024 -> 449536 (-12.38 %) bytes per wave
> ---
>  .../drivers/radeon/radeon_setup_tgsi_llvm.c| 31 
> +++---
>  1 file changed, 22 insertions(+), 9 deletions(-)
> 
> diff --git a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c 
> b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
> index c22ea7c..ac99e73 100644
> --- a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
> +++ b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
> @@ -919,7 +919,21 @@ static void emit_ucmp(
>   LLVMBuildSelect(builder, v, emit_data->args[1], 
> emit_data->args[2], "");
>  }
>  
> -static void emit_cmp(
> +static void emit_cmp(const struct lp_build_tgsi_action *action,
> +  struct lp_build_tgsi_context *bld_base,
> +  struct lp_build_emit_data *emit_data)
> +{
> + LLVMBuilderRef builder = bld_base->base.gallivm->builder;
> + LLVMValueRef cond, *args = emit_data->args;
> +
> + cond = LLVMBuildFCmp(builder, LLVMRealOLT, args[0],
> +  bld_base->base.zero, "");
> +
> + emit_data->output[emit_data->chan] =
> + LLVMBuildSelect(builder, cond, args[1], args[2], "");
> +}
> +
> +static void emit_set_cond(
>   const struct lp_build_tgsi_action *action,
>   struct lp_build_tgsi_context * bld_base,
>   struct lp_build_emit_data * emit_data)
> @@ -1503,8 +1517,7 @@ void radeon_llvm_context_init(struct 
> radeon_llvm_context * ctx)
>   bld_base->op_actions[TGSI_OPCODE_CEIL].intr_name = "llvm.ceil.f32";
>   bld_base->op_actions[TGSI_OPCODE_CLAMP].emit = 
> build_tgsi_intrinsic_nomem;
>   bld_base->op_actions[TGSI_OPCODE_CLAMP].intr_name = "llvm.AMDIL.clamp.";
> - bld_base->op_actions[TGSI_OPCODE_CMP].emit = build_tgsi_intrinsic_nomem;
> - bld_base->op_actions[TGSI_OPCODE_CMP].intr_name = "llvm.AMDGPU.cndlt";
> + bld_base->op_actions[TGSI_OPCODE_CMP].emit = emit_cmp;
>   bld_base->op_actions[TGSI_OPCODE_CONT].emit = cont_emit;
>   bld_base->op_actions[TGSI_OPCODE_COS].emit = build_tgsi_intrinsic_nomem;
>   bld_base->op_actions[TGSI_OPCODE_COS].intr_name = "llvm.cos.f32";
> @@ -1573,13 +1586,13 @@ void radeon_llvm_context_init(struct 
> radeon_llvm_context * ctx)
>   bld_base->op_actions[TGSI_OPCODE_ROUND].intr_name = "llvm.rint.f32";
>   bld_base->op_actions[TGSI_OPCODE_RSQ].intr_name = 
> "llvm.AMDGPU.rsq.clamped.f32";
>   bld_base->op_actions[TGSI_OPCODE_RSQ].emit = build_tgsi_intrinsic_nomem;
> - bld_base->op_actions[TGSI_OPCODE_SGE].emit = emit_cmp;
> - bld_base->op_actions[TGSI_OPCODE_SEQ].emit = emit_cmp;
> + bld_base->op_actions[TGSI_OPCODE_SGE].emit = emit_set_cond;
> + bld_base->op_actions[TGSI_OPCODE_SEQ].emit = emit_set_cond;
>   bld_base->op_actions[TGSI_OPCODE_SHL].emit = emit_shl;
> - bld_base->op_actions[TGSI_OPCODE_SLE].emit = emit_cmp;
> - bld_base->op_actions[TGSI_OPCODE_SLT].emit = emit_cmp;
> - bld_base->op_actions[TGSI_OPCODE_SNE].emit = emit_cmp;
> - bld_base->op_actions[TGSI_OPCODE_SGT].emit = emit_cmp;
> + bld_base->op_actions[TGSI_OPCODE_SLE].emit = emit_set_cond;
> + bld_base->op_actions[TGSI_OPCODE_SLT].emit = emit_set_cond;
> + bld_base->op_actions[TGSI_OPCODE_SNE].emit = emit_set_cond;
> + bld_base->op_actions[TGSI_OPCODE_SGT].emit = emit_set_cond;
>   bld_base->op_actions[TGSI_OPCODE_SIN].emit = build_tgsi_intrinsic_nomem;
>   bld_base->op_actions[TGSI_OPCODE_SIN].intr_name = "llvm.sin.f32";
>   bld_base->op_actions[TGSI_OPCODE_SQRT].emit = 
> build_tgsi_intrinsic_nomem;
> -- 
> 2.1.4
> 
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 08/10] radeonsi: re-enable unsafe-fp-math for LLVM 3.8

2015-10-15 Thread Tom Stellard

On Sun, Oct 11, 2015 at 03:29:48AM +0200, Marek Olšák wrote:
> From: Marek Olšák 
> 

I don't think we should globally enable this until we are sure it does not
introduce any illegal transforms.

> Required for 1/sqrt ==> rsq.

I think the arcp fast-math flag for instruction is supposed to allow this.
Let me check with some LLVM people.

-Tom
> 
> We should finally fix the hang instead of running away from the issue. This
> assumes the bug is in LLVM and we have time to fix it before the release.
> Include compute shaders as well, which only affects TGSI and thus OpenGL.
> 
> Totals:
> SGPRS: 344368 -> 345104 (0.21 %)
> VGPRS: 197552 -> 197420 (-0.07 %)
> Code Size: 7366304 -> 7324692 (-0.56 %) bytes
> LDS: 91 -> 91 (0.00 %) blocks
> Scratch: 1615872 -> 1524736 (-5.64 %) bytes per wave
> 
> Totals from affected shaders:
> SGPRS: 146696 -> 147432 (0.50 %)
> VGPRS: 87212 -> 87080 (-0.15 %)
> Code Size: 3852664 -> 3811052 (-1.08 %) bytes
> LDS: 48 -> 48 (0.00 %) blocks
> Scratch: 1179648 -> 1088512 (-7.73 %) bytes per wave
> ---
>  src/gallium/drivers/radeon/radeon_llvm_emit.c | 7 +++
>  1 file changed, 7 insertions(+)
> 
> diff --git a/src/gallium/drivers/radeon/radeon_llvm_emit.c 
> b/src/gallium/drivers/radeon/radeon_llvm_emit.c
> index 6b2ebde..4bda4a4 100644
> --- a/src/gallium/drivers/radeon/radeon_llvm_emit.c
> +++ b/src/gallium/drivers/radeon/radeon_llvm_emit.c
> @@ -84,6 +84,13 @@ void radeon_llvm_shader_type(LLVMValueRef F, unsigned type)
>   sprintf(Str, "%1d", llvm_type);
>  
>   LLVMAddTargetDependentFunctionAttr(F, "ShaderType", Str);
> +
> +#if HAVE_LLVM >= 0x0308
> + /* This only affects TGSI (OpenGL), so it's okay to set it for
> +  * compute shaders too.
> +  */
> + LLVMAddTargetDependentFunctionAttr(F, "unsafe-fp-math", "true");
> +#endif
>  }
>  
>  static void init_r600_target()
> -- 
> 2.1.4
> 
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] llvm TGSI backend (WIP) questions

2015-11-13 Thread Tom Stellard

On Fri, Nov 13, 2015 at 02:46:52PM +0100, Hans de Goede wrote:
> Hi All,
> 
> So as discussed I've started working on a TGSI backend for
> llvm to use as a way to get compute going on nouveau (and other gpu-s).
> 
> I'm still learning all the ins and outs of llvm so I do not have
> much to show yet.
> 
> I've rebased Francisco's (curro's) latest version on top of llvm
> trunk, and added a commit on top to actual get it build with the
> latest trunk. So currently I'm at the point where I've just
> taken Francisco's code, and made it compile, no more and no less.
> 
> I have a git repo with this work available here:
> 
> http://cgit.freedesktop.org/~jwrdegoede/llvm/
> 
> So the next step would be to test this and see if it actually
> does anything, questions:
> 
> 1) Does anyone have a simple test case / command where I can
> invoke just llvm and get TGSI asm output to check ?
> 

The easiest way to do this is with the llc tool which ships with llvm.
It compiles LLVM IR to target code, which in this case is tgsi.
I would recommend taking one of the simple examples from
test/CodeGen/AMDGPU (you may need to get these from llvm trunk, not sure
what llvm version you are using).

To use llc:

llc -march=tgsi input.ll -o -

This will output TGSI.

If you want to use clang to compile OpenCL C kernels to clang you will
need to teach clang about the TGSI target by implementing the a
sub-class of TargetInfo in lib/Basic/Targets.cpp.  Look at the 
AMDGPU target for examples, but I recommend starting with llc.

> 2) Assuming I get the above to (somewhat) work, is there a
> way to make llvm show the output of the various intermediate
> passes in a human readable form ?
> 

You can pass -print-before-all or -print-after-all to dump the
intermediate forms.

> Regards,
> 
> Hans
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 1/2] r600g, radeonsi: Fix calculation of IR target cap string buffer size

2015-01-23 Thread Tom Stellard

Reviewed-by: Tom Stellard 

On Fri, Jan 23, 2015 at 10:01:56PM +0100, Marek Olšák wrote:
> Reviewed-by: Marek Olšák 
> 
> Marek
> 
> On Thu, Jan 22, 2015 at 4:41 AM, Michel Dänzer  wrote:
> > From: Michel Dänzer 
> >
> > Fixes writing beyond the allocated buffer:
> >
> > ==31855== Invalid write of size 1
> > ==31855==at 0x50AB2A9: vsprintf (iovsprintf.c:43)
> > ==31855==by 0x508F6F6: sprintf (sprintf.c:32)
> > ==31855==by 0xB59C7EC: r600_get_compute_param (r600_pipe_common.c:526)
> > ==31855==by 0x5B2B7DE: get_compute_param (device.cpp:37)
> > ==31855==by 0x5B2B7DE: clover::device::ir_target() const 
> > (device.cpp:201)
> > ==31855==by 0x5B398E0: 
> > clover::program::build(clover::ref_vector const&, char 
> > const*, clover::compat::vector > clover::compat::string> > const&) (program.cpp:63)
> > ==31855==by 0x5B20152: clBuildProgram (program.cpp:182)
> > ==31855==by 0x400F41: main (hello_world.c:109)
> > ==31855==  Address 0x56fed5f is 0 bytes after a block of size 15 alloc'd
> > ==31855==at 0x4C29180: operator new(unsigned long) 
> > (vg_replace_malloc.c:324)
> > ==31855==by 0x5B2B7C2: allocate (new_allocator.h:104)
> > ==31855==by 0x5B2B7C2: allocate (alloc_traits.h:357)
> > ==31855==by 0x5B2B7C2: _M_allocate (stl_vector.h:170)
> > ==31855==by 0x5B2B7C2: _M_create_storage (stl_vector.h:185)
> > ==31855==by 0x5B2B7C2: _Vector_base (stl_vector.h:136)
> > ==31855==by 0x5B2B7C2: vector (stl_vector.h:278)
> > ==31855==by 0x5B2B7C2: get_compute_param (device.cpp:35)
> > ==31855==by 0x5B2B7C2: clover::device::ir_target() const 
> > (device.cpp:201)
> > ==31855==by 0x5B398E0: 
> > clover::program::build(clover::ref_vector const&, char 
> > const*, clover::compat::vector > clover::compat::string> > const&) (program.cpp:63)
> > ==31855==by 0x5B20152: clBuildProgram (program.cpp:182)
> > ==31855==by 0x400F41: main (hello_world.c:109)
> >
> > Signed-off-by: Michel Dänzer 
> > ---
> >  src/gallium/drivers/radeon/r600_pipe_common.c | 4 ++--
> >  1 file changed, 2 insertions(+), 2 deletions(-)
> >
> > diff --git a/src/gallium/drivers/radeon/r600_pipe_common.c 
> > b/src/gallium/drivers/radeon/r600_pipe_common.c
> > index f91772e..ddb4142 100644
> > --- a/src/gallium/drivers/radeon/r600_pipe_common.c
> > +++ b/src/gallium/drivers/radeon/r600_pipe_common.c
> > @@ -524,9 +524,9 @@ static int r600_get_compute_param(struct pipe_screen 
> > *screen,
> > }
> > if (ret) {
> > sprintf(ret, "%s-%s", gpu, triple);
> > -
> > }
> > -   return (strlen(triple) + strlen(gpu)) * sizeof(char);
> > +   /* +2 for dash and terminating NIL byte */
> > +   return (strlen(triple) + strlen(gpu) + 2) * sizeof(char);
> > }
> > case PIPE_COMPUTE_CAP_GRID_DIMENSION:
> > if (ret) {
> > --
> > 2.1.4
> >
> > ___
> > mesa-dev mailing list
> > mesa-dev@lists.freedesktop.org
> > http://lists.freedesktop.org/mailman/listinfo/mesa-dev
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] radeonsi: Enable VGPR spilling for all shader types v3

2015-01-23 Thread Tom Stellard

On Thu, Jan 22, 2015 at 11:27:32AM +0900, Michel Dänzer wrote:
> On 21.01.2015 21:12, Marek Olšák wrote:
> > We also had a case when the CPU accidentally corrupted shaders,
> > because the shaders were mapped after textures and a CPU texture
> > upload overflowed and overwrote shaders. I suppose we should have
> > unmapped the shaders.
> 
> Sounds like a good idea.
> 
> 
> Tom, for now I suggest this solution, summarized from Marek's previous
> descriptions:
> 
> (At least) for shaders which have relocations, keep a copy of the
> machine code in malloced memory. When the relocated values change,
> update them in the malloced memory, allocate a new BO, map it, copy the
> machine code from the malloced memory to the BO, replace any existing
> shader BO with the new one and invalidate the shader state.
> 

Hi,

Attached is a WIP patch attempting to implement it this way.
Unfortunately, I was unable to get it working, so I wanted to
submit it for review in case someone can spot what I'm doing wrong.

You can find the broken code wrapped in #if 0 in the
si_update_scratch_buffer() function in si_state_shaders.c

Based on the dmesg output and other tests I've done, it appears
that the GPU is still executing the shader code from the old bo
which does not contain the relocations.

The code in the #else branch works fine, but it updates the existing
bo in place rather than creating a new one.

Any idea what I've done wrong?

Thanks,
Tom
>From ba673155672756fb0bf9873b2ae76c3f5ccd02e2 Mon Sep 17 00:00:00 2001
From: Tom Stellard 
Date: Wed, 10 Dec 2014 09:13:59 -0500
Subject: [PATCH] radeonsi: Enable VGPR spilling for all shader types v5 (WIP)

v2:
  - Only emit write SPI_TMPRING_SIZE once per packet.
  - Use context global scratch buffer.

v3:
  - Patch shaders using WRITE_DATA packet instead of map/unmap.
  - Emit ICACHE_FLUSH, CS_PARTIAL_FLUSH, PS_PARTIAL_FLUSH, and
VS_PARTIAL_FLUSH when patching shaders.

v4:
  - Code cleanups.
  - Remove unnecessary multiplies.

v5:
  - Patch shaders in system memory and re-upload to vram.
---
 src/gallium/drivers/radeonsi/si_compute.c   |  42 +--
 src/gallium/drivers/radeonsi/si_hw_context.c|   1 +
 src/gallium/drivers/radeonsi/si_pipe.c  |   9 +-
 src/gallium/drivers/radeonsi/si_pipe.h  |   6 +
 src/gallium/drivers/radeonsi/si_shader.c|  54 +++--
 src/gallium/drivers/radeonsi/si_shader.h|   8 +-
 src/gallium/drivers/radeonsi/si_state_draw.c|  15 +++
 src/gallium/drivers/radeonsi/si_state_shaders.c | 141 +++-
 8 files changed, 227 insertions(+), 49 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_compute.c b/src/gallium/drivers/radeonsi/si_compute.c
index 981bccb..4dd4379 100644
--- a/src/gallium/drivers/radeonsi/si_compute.c
+++ b/src/gallium/drivers/radeonsi/si_compute.c
@@ -42,12 +42,6 @@
 #define NUM_USER_SGPRS 4
 #endif
 
-static const char *scratch_rsrc_dword0_symbol =
-	"SCRATCH_RSRC_DWORD0";
-
-static const char *scratch_rsrc_dword1_symbol =
-	"SCRATCH_RSRC_DWORD1";
-
 struct si_compute {
 	struct si_context *ctx;
 
@@ -68,8 +62,6 @@ struct si_compute {
 #endif
 };
 
-static void apply_scratch_relocs(const struct si_screen *sscreen,
-			struct si_shader *shader, uint64_t scratch_va);
 static void init_scratch_buffer(struct si_context *sctx, struct si_compute *program)
 {
 	unsigned scratch_bytes = 0;
@@ -85,8 +77,8 @@ static void init_scratch_buffer(struct si_context *sctx, struct si_compute *prog
 program->shader.binary.global_symbol_offsets[i];
 		unsigned scratch_bytes_needed;
 
-		si_shader_binary_read_config(&program->shader.binary,
-		&program->shader, offset);
+		si_shader_binary_read_config(sctx->screen, &program->shader.binary,
+&program->shader, offset);
 		scratch_bytes_needed = scratch_waves *
 program->shader.scratch_bytes_per_wave;
 		scratch_bytes = MAX2(scratch_bytes, scratch_bytes_needed);
@@ -101,7 +93,8 @@ static void init_scratch_buffer(struct si_context *sctx, struct si_compute *prog
 	scratch_buffer_va = program->scratch_bo->gpu_address;
 
 	/* Patch the shader with the scratch buffer address. */
-	apply_scratch_relocs(sctx->screen, &program->shader, scratch_buffer_va);
+	si_shader_apply_scratch_relocs(sctx,
+&program->shader, scratch_buffer_va);
 
 }
 
@@ -226,30 +219,6 @@ static unsigned compute_num_waves_for_scratch(
 	return scratch_waves;
 }
 
-static void apply_scratch_relocs(const struct si_screen *sscreen,
-			struct si_shader *shader, uint64_t scratch_va) {
-	unsigned i;
-	uint32_t scratch_rsrc_dword0 = scratch_va & 0x;
-	uint32_t scratch_rsrc_dword1 =
-		S_008F04_BASE_ADDRESS_HI(scratch_va >> 32)
-		|  S_008F04_STRIDE(shader->scratch_bytes_per_wave / 64);
-
-	if (!shader->binary.reloc_count) {
-		return;
-	}
-
-	for (i = 0 ; i < shader->b

Re: [Mesa-dev] [PATCH 1/1] clover: Fix build with llvm after r226981

2015-01-26 Thread Tom Stellard

On Sun, Jan 25, 2015 at 04:11:40PM -0500, Jan Vesely wrote:
> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=88783
> Signed-off-by: Jan Vesely 

Pushed, thanks!

> ---
>  src/gallium/state_trackers/clover/llvm/invocation.cpp | 4 
>  1 file changed, 4 insertions(+)
> 
> diff --git a/src/gallium/state_trackers/clover/llvm/invocation.cpp 
> b/src/gallium/state_trackers/clover/llvm/invocation.cpp
> index 6cc07b2..7a0be53 100644
> --- a/src/gallium/state_trackers/clover/llvm/invocation.cpp
> +++ b/src/gallium/state_trackers/clover/llvm/invocation.cpp
> @@ -331,7 +331,11 @@ namespace {
>  
>llvm::PassManagerBuilder PMB;
>PMB.OptLevel = optimization_level;
> +#if HAVE_LLVM < 0x0307
>PMB.LibraryInfo = new llvm::TargetLibraryInfo(
> +#else
> +  PMB.LibraryInfo = new llvm::TargetLibraryInfoImpl(
> +#endif
>  llvm::Triple(mod->getTargetTriple()));
>PMB.populateModulePassManager(PM);
>PM.run(*mod);
> -- 
> 2.1.0
> 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 1/5] radeonsi: Avoid leaking memory when rebuilding shader states

2015-01-27 Thread Tom Stellard

From: Marek Olšák 

---
 src/gallium/drivers/radeonsi/si_pm4.c   | 12 
 src/gallium/drivers/radeonsi/si_pm4.h   |  1 +
 src/gallium/drivers/radeonsi/si_state_shaders.c |  4 
 3 files changed, 13 insertions(+), 4 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_pm4.c 
b/src/gallium/drivers/radeonsi/si_pm4.c
index 5edf152..e9b9a5f 100644
--- a/src/gallium/drivers/radeonsi/si_pm4.c
+++ b/src/gallium/drivers/radeonsi/si_pm4.c
@@ -103,6 +103,13 @@ void si_pm4_add_bo(struct si_pm4_state *state,
state->bo_priority[idx] = priority;
 }
 
+void si_pm4_free_state_simple(struct si_pm4_state *state)
+{
+   for (int i = 0; i < state->nbo; ++i)
+   r600_resource_reference(&state->bo[i], NULL);
+   FREE(state);
+}
+
 void si_pm4_free_state(struct si_context *sctx,
   struct si_pm4_state *state,
   unsigned idx)
@@ -114,10 +121,7 @@ void si_pm4_free_state(struct si_context *sctx,
sctx->emitted.array[idx] = NULL;
}
 
-   for (int i = 0; i < state->nbo; ++i) {
-   r600_resource_reference(&state->bo[i], NULL);
-   }
-   FREE(state);
+   si_pm4_free_state_simple(state);
 }
 
 unsigned si_pm4_dirty_dw(struct si_context *sctx)
diff --git a/src/gallium/drivers/radeonsi/si_pm4.h 
b/src/gallium/drivers/radeonsi/si_pm4.h
index 8680a9e..bfb5562 100644
--- a/src/gallium/drivers/radeonsi/si_pm4.h
+++ b/src/gallium/drivers/radeonsi/si_pm4.h
@@ -71,6 +71,7 @@ void si_pm4_add_bo(struct si_pm4_state *state,
   enum radeon_bo_usage usage,
   enum radeon_bo_priority priority);
 
+void si_pm4_free_state_simple(struct si_pm4_state *state);
 void si_pm4_free_state(struct si_context *sctx,
   struct si_pm4_state *state,
   unsigned idx);
diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c 
b/src/gallium/drivers/radeonsi/si_state_shaders.c
index 887680f..3249bcc 100644
--- a/src/gallium/drivers/radeonsi/si_state_shaders.c
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
@@ -316,6 +316,10 @@ static void si_shader_ps(struct si_shader *shader)
 
 static void si_shader_init_pm4_state(struct si_shader *shader)
 {
+
+   if (shader->pm4)
+   si_pm4_free_state_simple(shader->pm4);
+
switch (shader->selector->type) {
case PIPE_SHADER_VERTEX:
if (shader->key.vs.as_es)
-- 
2.0.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 3/5] radeonsi: Add radeon_shader_binary member to struct si_shader

2015-01-27 Thread Tom Stellard

---
 src/gallium/drivers/radeonsi/si_compute.c | 11 +--
 src/gallium/drivers/radeonsi/si_shader.h  |  1 +
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_compute.c 
b/src/gallium/drivers/radeonsi/si_compute.c
index ba63afd..840d21f 100644
--- a/src/gallium/drivers/radeonsi/si_compute.c
+++ b/src/gallium/drivers/radeonsi/si_compute.c
@@ -54,7 +54,6 @@ struct si_compute {
unsigned local_size;
unsigned private_size;
unsigned input_size;
-   struct radeon_shader_binary binary;
struct si_shader shader;
unsigned num_user_sgprs;
 
@@ -102,8 +101,8 @@ static void *si_create_compute_state(
}
 #else
 
-   radeon_elf_read(code, header->num_bytes, &program->binary, true);
-   si_shader_binary_read(sctx->screen, &program->shader, &program->binary);
+   radeon_elf_read(code, header->num_bytes, &program->shader.binary, true);
+   si_shader_binary_read(sctx->screen, &program->shader, 
&program->shader.binary);
 
 #endif
program->input_buffer = si_resource_create_custom(sctx->b.b.screen,
@@ -256,7 +255,7 @@ static void si_launch_grid(
 
 #if HAVE_LLVM >= 0x0306
/* Read the config information */
-   si_shader_binary_read_config(&program->binary, shader, pc);
+   si_shader_binary_read_config(&program->shader.binary, shader, pc);
 #endif
 
/* Upload the kernel arguments */
@@ -296,7 +295,7 @@ static void si_launch_grid(
 
/* Patch the shader with the scratch buffer address. */
apply_scratch_relocs(sctx->screen,
-   &program->binary, shader, scratch_buffer_va);
+   &program->shader.binary, shader, scratch_buffer_va);
 
}
 
@@ -481,7 +480,7 @@ static void si_delete_compute_state(struct pipe_context 
*ctx, void* state){
pipe_resource_reference(
(struct pipe_resource **)&program->input_buffer, NULL);
 
-   radeon_shader_binary_free_members(&program->binary, true);
+   radeon_shader_binary_free_members(&program->shader.binary, true);
FREE(program);
 }
 
diff --git a/src/gallium/drivers/radeonsi/si_shader.h 
b/src/gallium/drivers/radeonsi/si_shader.h
index 08e344a..6def5c7 100644
--- a/src/gallium/drivers/radeonsi/si_shader.h
+++ b/src/gallium/drivers/radeonsi/si_shader.h
@@ -142,6 +142,7 @@ struct si_shader {
struct si_pm4_state *pm4;
struct r600_resource*bo;
struct r600_resource*scratch_bo;
+   struct radeon_shader_binary binary;
unsignednum_sgprs;
unsignednum_vgprs;
unsignedlds_size;
-- 
2.0.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 4/5] radeonsi/compute: Allocate the scratch buffer during state creation

2015-01-27 Thread Tom Stellard

This moves scratch buffer allocation from si_launch_grid() to
si_create_compute_state().  This helps to reduce the overhead of
launching a kernel and also fixes a bug in the code that would cause
the scratch buffer to be too small if a kernel with smaller scratch size
was launched before a kernel with a larger scratch size.
---
 src/gallium/drivers/radeonsi/si_compute.c | 82 ++-
 src/gallium/drivers/radeonsi/si_shader.c  |  4 +-
 2 files changed, 62 insertions(+), 24 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_compute.c 
b/src/gallium/drivers/radeonsi/si_compute.c
index 840d21f..d0966af 100644
--- a/src/gallium/drivers/radeonsi/si_compute.c
+++ b/src/gallium/drivers/radeonsi/si_compute.c
@@ -67,6 +67,49 @@ struct si_compute {
 #endif
 };
 
+static void apply_scratch_relocs(const struct si_screen *sscreen,
+   struct si_shader *shader, uint64_t scratch_va);
+static void init_scratch_buffer(struct si_context *sctx, struct si_compute 
*program)
+{
+   unsigned scratch_bytes = 0;
+   uint64_t scratch_buffer_va;
+   unsigned i;
+
+   /* Compute the scratch buffer size using the maximum number of waves.
+* This way we don't need to recompute it for each kernel launch. */
+   unsigned scratch_waves = 32 * sctx->screen->b.info.max_compute_units;
+   for (i = 0; i < program->shader.binary.global_symbol_count; i++) {
+   unsigned offset =
+   program->shader.binary.global_symbol_offsets[i];
+   unsigned scratch_bytes_needed;
+
+   si_shader_binary_read_config(&program->shader.binary,
+   &program->shader, offset);
+   scratch_bytes_needed = program->shader.scratch_bytes_per_wave;
+   scratch_bytes = MAX2(scratch_bytes, scratch_bytes_needed);
+   }
+
+   if (scratch_bytes == 0)
+   return;
+
+   program->shader.scratch_bo = (struct r600_resource*)
+   si_resource_create_custom(sctx->b.b.screen,
+   PIPE_USAGE_DEFAULT,
+   scratch_bytes * scratch_waves);
+
+   scratch_buffer_va = program->shader.scratch_bo->gpu_address;
+
+   /* apply_scratch_relocs needs scratch_bytes_per_wave to be set
+* to the maximum bytes needed, so it can compute the stride
+* correctly.
+*/
+   program->shader.scratch_bytes_per_wave = scratch_bytes;
+
+   /* Patch the shader with the scratch buffer address. */
+   apply_scratch_relocs(sctx->screen, &program->shader, scratch_buffer_va);
+
+}
+
 static void *si_create_compute_state(
struct pipe_context *ctx,
const struct pipe_compute_state *cso)
@@ -102,6 +145,12 @@ static void *si_create_compute_state(
 #else
 
radeon_elf_read(code, header->num_bytes, &program->shader.binary, true);
+
+   /* init_scratch_buffer patches the shader code with the scratch address,
+* so we need to call it beofre si_shader_binary_read() which uploads
+* the shader code to the GPU.
+*/
+   init_scratch_buffer(sctx, program);
si_shader_binary_read(sctx->screen, &program->shader, 
&program->shader.binary);
 
 #endif
@@ -183,32 +232,27 @@ static unsigned compute_num_waves_for_scratch(
 }
 
 static void apply_scratch_relocs(const struct si_screen *sscreen,
-   const struct radeon_shader_binary *binary,
struct si_shader *shader, uint64_t scratch_va) {
unsigned i;
-   char *ptr;
uint32_t scratch_rsrc_dword0 = scratch_va & 0x;
uint32_t scratch_rsrc_dword1 =
S_008F04_BASE_ADDRESS_HI(scratch_va >> 32)
|  S_008F04_STRIDE(shader->scratch_bytes_per_wave / 64);
 
-   if (!binary->reloc_count) {
+   if (!shader->binary.reloc_count) {
return;
}
 
-   ptr = sscreen->b.ws->buffer_map(shader->bo->cs_buf, NULL,
-   PIPE_TRANSFER_READ_WRITE);
-   for (i = 0 ; i < binary->reloc_count; i++) {
-   const struct radeon_shader_reloc *reloc = &binary->relocs[i];
+   for (i = 0 ; i < shader->binary.reloc_count; i++) {
+   const struct radeon_shader_reloc *reloc = 
&shader->binary.relocs[i];
if (!strcmp(scratch_rsrc_dword0_symbol, reloc->name)) {
-   util_memcpy_cpu_to_le32(ptr + reloc->offset,
+   util_memcpy_cpu_to_le32(shader->binary.code + 
reloc->offset,
&scratch_rsrc_dword0, 4);
} else if (!strcmp(scratch_rsrc_dword1_symbol, reloc->name)) {
-   util_memcpy_cpu_to_le32(ptr + reloc->offset,
+   util_memcpy_cpu_to_le32(shader->binary.code + 
reloc->offset,
&scratch_rsrc_dword1, 4);
}
}
-   sscree

[Mesa-dev] [PATCH 5/5] radeonsi: Enable VGPR spilling for all shader types v5

2015-01-27 Thread Tom Stellard

v2:
  - Only emit write SPI_TMPRING_SIZE once per packet.
  - Use context global scratch buffer.

v3:
  - Patch shaders using WRITE_DATA packet instead of map/unmap.
  - Emit ICACHE_FLUSH, CS_PARTIAL_FLUSH, PS_PARTIAL_FLUSH, and
VS_PARTIAL_FLUSH when patching shaders.

v4:
  - Code cleanups.
  - Remove unnecessary multiplies.

v5:
  - Patch shaders in system memory and re-upload to vram.
---
 src/gallium/drivers/radeonsi/si_compute.c   |  44 ++--
 src/gallium/drivers/radeonsi/si_hw_context.c|   1 +
 src/gallium/drivers/radeonsi/si_pipe.c  |   9 +-
 src/gallium/drivers/radeonsi/si_pipe.h  |   6 ++
 src/gallium/drivers/radeonsi/si_shader.c|  59 --
 src/gallium/drivers/radeonsi/si_shader.h|   6 +-
 src/gallium/drivers/radeonsi/si_state_draw.c|  15 +++
 src/gallium/drivers/radeonsi/si_state_shaders.c | 137 +++-
 8 files changed, 225 insertions(+), 52 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_compute.c 
b/src/gallium/drivers/radeonsi/si_compute.c
index d0966af..b525683 100644
--- a/src/gallium/drivers/radeonsi/si_compute.c
+++ b/src/gallium/drivers/radeonsi/si_compute.c
@@ -42,12 +42,6 @@
 #define NUM_USER_SGPRS 4
 #endif
 
-static const char *scratch_rsrc_dword0_symbol =
-   "SCRATCH_RSRC_DWORD0";
-
-static const char *scratch_rsrc_dword1_symbol =
-   "SCRATCH_RSRC_DWORD1";
-
 struct si_compute {
struct si_context *ctx;
 
@@ -67,8 +61,6 @@ struct si_compute {
 #endif
 };
 
-static void apply_scratch_relocs(const struct si_screen *sscreen,
-   struct si_shader *shader, uint64_t scratch_va);
 static void init_scratch_buffer(struct si_context *sctx, struct si_compute 
*program)
 {
unsigned scratch_bytes = 0;
@@ -83,7 +75,7 @@ static void init_scratch_buffer(struct si_context *sctx, 
struct si_compute *prog
program->shader.binary.global_symbol_offsets[i];
unsigned scratch_bytes_needed;
 
-   si_shader_binary_read_config(&program->shader.binary,
+   si_shader_binary_read_config(sctx->screen,
&program->shader, offset);
scratch_bytes_needed = program->shader.scratch_bytes_per_wave;
scratch_bytes = MAX2(scratch_bytes, scratch_bytes_needed);
@@ -106,8 +98,8 @@ static void init_scratch_buffer(struct si_context *sctx, 
struct si_compute *prog
program->shader.scratch_bytes_per_wave = scratch_bytes;
 
/* Patch the shader with the scratch buffer address. */
-   apply_scratch_relocs(sctx->screen, &program->shader, scratch_buffer_va);
-
+   si_shader_apply_scratch_relocs(sctx,
+   &program->shader, scratch_buffer_va);
 }
 
 static void *si_create_compute_state(
@@ -231,30 +223,6 @@ static unsigned compute_num_waves_for_scratch(
return scratch_waves;
 }
 
-static void apply_scratch_relocs(const struct si_screen *sscreen,
-   struct si_shader *shader, uint64_t scratch_va) {
-   unsigned i;
-   uint32_t scratch_rsrc_dword0 = scratch_va & 0x;
-   uint32_t scratch_rsrc_dword1 =
-   S_008F04_BASE_ADDRESS_HI(scratch_va >> 32)
-   |  S_008F04_STRIDE(shader->scratch_bytes_per_wave / 64);
-
-   if (!shader->binary.reloc_count) {
-   return;
-   }
-
-   for (i = 0 ; i < shader->binary.reloc_count; i++) {
-   const struct radeon_shader_reloc *reloc = 
&shader->binary.relocs[i];
-   if (!strcmp(scratch_rsrc_dword0_symbol, reloc->name)) {
-   util_memcpy_cpu_to_le32(shader->binary.code + 
reloc->offset,
-   &scratch_rsrc_dword0, 4);
-   } else if (!strcmp(scratch_rsrc_dword1_symbol, reloc->name)) {
-   util_memcpy_cpu_to_le32(shader->binary.code + 
reloc->offset,
-   &scratch_rsrc_dword1, 4);
-   }
-   }
-}
-
 static void si_launch_grid(
struct pipe_context *ctx,
const uint *block_layout, const uint *grid_layout,
@@ -299,7 +267,7 @@ static void si_launch_grid(
 
 #if HAVE_LLVM >= 0x0306
/* Read the config information */
-   si_shader_binary_read_config(&program->shader.binary, shader, pc);
+   si_shader_binary_read_config(sctx->screen, shader, pc);
 #endif
 
/* Upload the kernel arguments */
@@ -510,13 +478,15 @@ static void si_delete_compute_state(struct pipe_context 
*ctx, void* state){
LLVMContextDispose(program->llvm_ctx);
}
 #else
+   FREE(program->shader.binary.config);
+   FREE(program->shader.binary.rodata);
+   FREE(program->shader.binary.global_symbol_offsets);
si_shader_destroy(ctx, &program->shader);
 #endif
 
pipe_resource_reference(
(struct pipe_resource **)&program->input_buffer, NULL);
 
-   radeon_shader_binary_free_mem

[Mesa-dev] [PATCH 2/5] radeonsi/compute: Rename si_compute::program to si_compute::shader

2015-01-27 Thread Tom Stellard

---
 src/gallium/drivers/radeonsi/si_compute.c | 10 +-
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_compute.c 
b/src/gallium/drivers/radeonsi/si_compute.c
index 4427d3b..ba63afd 100644
--- a/src/gallium/drivers/radeonsi/si_compute.c
+++ b/src/gallium/drivers/radeonsi/si_compute.c
@@ -55,7 +55,7 @@ struct si_compute {
unsigned private_size;
unsigned input_size;
struct radeon_shader_binary binary;
-   struct si_shader program;
+   struct si_shader shader;
unsigned num_user_sgprs;
 
struct r600_resource *input_buffer;
@@ -103,7 +103,7 @@ static void *si_create_compute_state(
 #else
 
radeon_elf_read(code, header->num_bytes, &program->binary, true);
-   si_shader_binary_read(sctx->screen, &program->program, 
&program->binary);
+   si_shader_binary_read(sctx->screen, &program->shader, &program->binary);
 
 #endif
program->input_buffer = si_resource_create_custom(sctx->b.b.screen,
@@ -231,7 +231,7 @@ static void si_launch_grid(
uint64_t shader_va;
unsigned arg_user_sgpr_count = NUM_USER_SGPRS;
unsigned i;
-   struct si_shader *shader = &program->program;
+   struct si_shader *shader = &program->shader;
unsigned lds_blocks;
unsigned num_waves_for_scratch;
 
@@ -256,7 +256,7 @@ static void si_launch_grid(
 
 #if HAVE_LLVM >= 0x0306
/* Read the config information */
-   si_shader_binary_read_config(&program->binary, &program->program, pc);
+   si_shader_binary_read_config(&program->binary, shader, pc);
 #endif
 
/* Upload the kernel arguments */
@@ -475,7 +475,7 @@ static void si_delete_compute_state(struct pipe_context 
*ctx, void* state){
LLVMContextDispose(program->llvm_ctx);
}
 #else
-   si_shader_destroy(ctx, &program->program);
+   si_shader_destroy(ctx, &program->shader);
 #endif
 
pipe_resource_reference(
-- 
2.0.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] util: Predicate the fpclassify fallback on !defined(__cplusplus)

2015-01-28 Thread Tom Stellard

On Wed, Jan 28, 2015 at 10:44:57AM -0800, Jason Ekstrand wrote:
> The problem is that the fallbacks we have at the moment don't work in C++.
> While we could theoretically fix the fallbacks it would also raise the
> issue of correctly detecting the fpclassify function.  So, for now, we'll
> just disable it until we actually have a C++ user.
> 
Tested-by: Tom Stellard 
> Reported-by: Tom Stellard 

Should use my AMD email here too ^ or you can drop this line all together if 
you want.

-Tom

> ---
>  src/util/macros.h | 14 --
>  1 file changed, 12 insertions(+), 2 deletions(-)
> 
> diff --git a/src/util/macros.h b/src/util/macros.h
> index 180f2f6..74bd8bf 100644
> --- a/src/util/macros.h
> +++ b/src/util/macros.h
> @@ -158,7 +158,15 @@ do {   \
>  #   endif
>  #endif
>  
> -#if defined(fpclassify)
> +/* The fallbacks below don't work correctly in C++ and properly detecting
> + * FP_NORMAL in C++ is hard.  Since we don't use fpclassify in any C++ code
> + * at the moment, we can just predicate this whole thing by not being in
> + * C++ and we shoudld be ok.  If we ever want to use fpclassify in a C++
> + * file, we will have to revisit this.
> + */
> +#ifndef __cplusplus
> +
> +#ifdef FP_NORMAL
>  /* ISO C99 says that fpclassify is a macro.  Assume that any implementation
>   * of fpclassify, whether it's in a C99 compiler or not, will be a macro.
>   */
> @@ -199,7 +207,7 @@ fpclassify(double x)
>  
>  #else
>  
> -enum {FP_NAN, FP_INFINITE, FP_ZERO, FP_SUBNORMAL, FP_NORMAL}
> +static inline enum {FP_NAN, FP_INFINITE, FP_ZERO, FP_SUBNORMAL, FP_NORMAL}
>  fpclassify(double x)
>  {
> /* XXX do something better someday */
> @@ -208,4 +216,6 @@ fpclassify(double x)
>  
>  #endif
>  
> +#endif /* __cplusplus */
> +
>  #endif /* UTIL_MACROS_H */
> -- 
> 2.2.2
> 
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 2/2] i965: Optimize multiplication by -1 into a negated MOV.

2015-02-12 Thread Tom Stellard

On Thu, Feb 12, 2015 at 03:36:37PM -0800, Matt Turner wrote:
> instructions in affected programs: 968 -> 942 (-2.69%)
> helped:4
> ---
>  src/mesa/drivers/dri/i965/brw_fs.cpp   | 9 +
>  src/mesa/drivers/dri/i965/brw_vec4.cpp | 5 +
>  2 files changed, 14 insertions(+)
> 
> diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp 
> b/src/mesa/drivers/dri/i965/brw_fs.cpp
> index e74a22e..988e36a 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
> @@ -2335,6 +2335,15 @@ fs_visitor::opt_algebraic()
>   break;
>}
>  
> +  /* a * -1.0 = -a */
> +  if (inst->src[1].is_negative_one()) {
> + inst->opcode = BRW_OPCODE_MOV;
> +inst->src[0].negate = !inst->src[0].negate;

The indentation looks wrong here.

> + inst->src[1] = reg_undef;
> + progress = true;
> + break;
> +  }
> +
>   /* a * 0.0 = 0.0 */
>   if (inst->src[1].is_zero()) {
>  inst->opcode = BRW_OPCODE_MOV;
> diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp 
> b/src/mesa/drivers/dri/i965/brw_vec4.cpp
> index ee74052..7868465 100644
> --- a/src/mesa/drivers/dri/i965/brw_vec4.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp
> @@ -725,6 +725,11 @@ vec4_visitor::opt_algebraic()
>   inst->opcode = BRW_OPCODE_MOV;
>   inst->src[1] = src_reg();
>   progress = true;
> +  } else if (inst->src[1].is_negative_one()) {
> + inst->opcode = BRW_OPCODE_MOV;
> +inst->src[0].negate = !inst->src[0].negate;

Here too.

Also, is this transformation valid when a is INF or NAN?

-Tom

> + inst->src[1] = src_reg();
> + progress = true;
>}
>break;
>case BRW_OPCODE_CMP:
> -- 
> 2.0.5
> 
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH v3] clover: Use Legacy PassManager for LLVM trunk (3.7)

2015-02-13 Thread Tom Stellard

On Fri, Feb 13, 2015 at 09:16:17PM -0500, Shawn Starr wrote:
> v3: Fix my typo, 3rd time's a charm, right?
> 

Pushed, thanks!

-Tom

> From 5712d95cb12d8e82d34fafe66bf827ee6726220e Mon Sep 17 00:00:00 2001
> From: Shawn Starr 
> Date: Fri, 13 Feb 2015 20:30:01 -0500
> Subject: [PATCH] clover: Use Legacy PassManager for LLVM trunk (3.7)
> 
> Signed-off-by: Shawn Starr 
> ---
>  src/gallium/state_trackers/clover/llvm/invocation.cpp | 11 ++-
>  1 file changed, 10 insertions(+), 1 deletion(-)
> 
> diff --git a/src/gallium/state_trackers/clover/llvm/invocation.cpp 
> b/src/gallium/state_trackers/clover/llvm/invocation.cpp
> index 0794e61..3c2ca49 100644
> --- a/src/gallium/state_trackers/clover/llvm/invocation.cpp
> +++ b/src/gallium/state_trackers/clover/llvm/invocation.cpp
> @@ -44,7 +44,11 @@
>  #if HAVE_LLVM < 0x0305
>  #include 
>  #endif
> +#if HAVE_LLVM >= 0x0307
> +#include 
> +#else
>  #include 
> +#endif
>  #include 
>  #include 
>  #include 
> @@ -298,7 +302,12 @@ namespace {
> optimize(llvm::Module *mod, unsigned optimization_level,
>  const std::vector &kernels) {
>  
> -  llvm::PassManager PM;
> +#if HAVE_LLVM >= 0x0307
> +  llvm::legacy::PassManager PM;
> +#else
> +  llvm::PassManager PM;
> +#endif
> +
>// Add a function internalizer pass.
>//
>// By default, the function internalizer pass will look for a function
> -- 
> 2.1.0
> 
> 
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] Revert "radeon/llvm: enable unsafe math for graphics shaders"

2015-02-17 Thread Tom Stellard

On Tue, Feb 17, 2015 at 05:15:42PM +0900, Michel Dänzer wrote:
> From: Michel Dänzer 
> 

Reviewed-by: Tom Stellard 

> This reverts commit 0e9cdedd2e3943bdb7f3543a3508b883b167e427.
> 
> It caused the grass to disappear in The Talos Principle.
> 
> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=89069
> Signed-off-by: Michel Dänzer 
> ---
>  src/gallium/drivers/radeon/radeon_llvm_emit.c | 4 
>  1 file changed, 4 deletions(-)
> 
> diff --git a/src/gallium/drivers/radeon/radeon_llvm_emit.c 
> b/src/gallium/drivers/radeon/radeon_llvm_emit.c
> index 0f9dbab..624077c 100644
> --- a/src/gallium/drivers/radeon/radeon_llvm_emit.c
> +++ b/src/gallium/drivers/radeon/radeon_llvm_emit.c
> @@ -80,10 +80,6 @@ void radeon_llvm_shader_type(LLVMValueRef F, unsigned type)
>   sprintf(Str, "%1d", llvm_type);
>  
>   LLVMAddTargetDependentFunctionAttr(F, "ShaderType", Str);
> -
> - if (type != TGSI_PROCESSOR_COMPUTE) {
> - LLVMAddTargetDependentFunctionAttr(F, "unsafe-fp-math", "true");
> - }
>  }
>  
>  static void init_r600_target()
> -- 
> 2.1.4
> 
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCHv2] ra: Disable round-robin strategy for optimistically colorable nodes.

2015-02-17 Thread Tom Stellard

On Tue, Feb 17, 2015 at 03:23:05PM +0200, Francisco Jerez wrote:
> The round-robin allocation strategy is expected to decrease the amount
> of false dependencies created by the register allocator and give the
> post-RA scheduling pass more freedom to move instructions around.  On
> the other hand it has the disadvantage of increasing fragmentation and
> decreasing the number of equally-colored nearby nodes, what increases
> the likelihood of failure in presence of optimistically colorable
> nodes.
> 
> This patch disables the round-robin strategy for optimistically
> colorable nodes.  These typically arise in situations of high register
> pressure or for registers with large live intervals, in both cases the
> task of the instruction scheduler shouldn't be constrained excessively
> by the dense packing of those nodes, and a spill (or on Intel hardware
> a fall-back to SIMD8 mode) is invariably worse than a slightly less
> optimal scheduling.
> 

I'm trying to figure out how this will affect r300g, and it seems like
from your description that it will be an improvement, because r300g
doesn't have a post-ra scheduler and it also can't spill registers.

What do you think?

-Tom


> Shader-db results on the i965 driver:
> 
> total instructions in shared programs: 5488539 -> 5488489 (-0.00%)
> instructions in affected programs: 1121 -> 1071 (-4.46%)
> helped:1
> HURT:  0
> GAINED:49
> LOST:  5
> 
> v2: Re-enable round-robin already for the lowest one of the nodes
> pushed optimistically onto the sack (Connor).
> ---
>  src/util/register_allocate.c | 23 ++-
>  1 file changed, 22 insertions(+), 1 deletion(-)
> 
> diff --git a/src/util/register_allocate.c b/src/util/register_allocate.c
> index af7a20c..b1ed273 100644
> --- a/src/util/register_allocate.c
> +++ b/src/util/register_allocate.c
> @@ -168,6 +168,12 @@ struct ra_graph {
>  
> unsigned int *stack;
> unsigned int stack_count;
> +
> +   /**
> +* Tracks the start of the set of optimistically-colored registers in the
> +* stack.
> +*/
> +   unsigned int stack_optimistic_start;
>  };
>  
>  /**
> @@ -454,6 +460,7 @@ static void
>  ra_simplify(struct ra_graph *g)
>  {
> bool progress = true;
> +   unsigned int stack_optimistic_start = ~0;
> int i;
>  
> while (progress) {
> @@ -483,12 +490,16 @@ ra_simplify(struct ra_graph *g)
>  
>if (!progress && best_optimistic_node != ~0U) {
>decrement_q(g, best_optimistic_node);
> + stack_optimistic_start =
> +MIN2(stack_optimistic_start, g->stack_count);
>g->stack[g->stack_count] = best_optimistic_node;
>g->stack_count++;
>g->nodes[best_optimistic_node].in_stack = true;
>progress = true;
>}
> }
> +
> +   g->stack_optimistic_start = stack_optimistic_start;
>  }
>  
>  /**
> @@ -542,7 +553,17 @@ ra_select(struct ra_graph *g)
>g->nodes[n].reg = r;
>g->stack_count--;
>  
> -  if (g->regs->round_robin)
> +  /* Rotate the starting point except for any nodes above the lowest
> +   * optimistically colorable node.  The likelihood that we will succeed
> +   * at allocating optimistically colorable nodes is highly dependent on
> +   * the way that the previous nodes popped off the stack are laid out.
> +   * The round-robin strategy increases the fragmentation of the register
> +   * file and decreases the number of nearby nodes assigned to the same
> +   * color, what increases the likelihood of spilling with respect to the
> +   * dense packing strategy.
> +   */
> +  if (g->regs->round_robin &&
> +  g->stack_count <= g->stack_optimistic_start + 1)
>   start_search_reg = r + 1;
> }
>  
> -- 
> 2.1.3
> 
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCHv2] ra: Disable round-robin strategy for optimistically colorable nodes.

2015-02-17 Thread Tom Stellard

On Tue, Feb 17, 2015 at 04:41:41PM +0200, Francisco Jerez wrote:
> Tom Stellard  writes:
> 
> > On Tue, Feb 17, 2015 at 03:23:05PM +0200, Francisco Jerez wrote:
> >> The round-robin allocation strategy is expected to decrease the amount
> >> of false dependencies created by the register allocator and give the
> >> post-RA scheduling pass more freedom to move instructions around.  On
> >> the other hand it has the disadvantage of increasing fragmentation and
> >> decreasing the number of equally-colored nearby nodes, what increases
> >> the likelihood of failure in presence of optimistically colorable
> >> nodes.
> >> 
> >> This patch disables the round-robin strategy for optimistically
> >> colorable nodes.  These typically arise in situations of high register
> >> pressure or for registers with large live intervals, in both cases the
> >> task of the instruction scheduler shouldn't be constrained excessively
> >> by the dense packing of those nodes, and a spill (or on Intel hardware
> >> a fall-back to SIMD8 mode) is invariably worse than a slightly less
> >> optimal scheduling.
> >> 
> >
> Hi Tom,
> 
> > I'm trying to figure out how this will affect r300g, and it seems like
> > from your description that it will be an improvement, because r300g
> > doesn't have a post-ra scheduler and it also can't spill registers.
> >
> > What do you think?
> >
> 
> It looks like it won't, apparently i965 is the only caller of
> ra_set_allocate_round_robin() in the tree right now, so it should be the
> only affected back-end.  You could consider enabling it to reduce the
> number false dependencies introduced by the register allocator -- after
> this patch it shouldn't lead to increased likelihood of register
> allocation failure anymore.  It might however lead to increased register
> usage possibly limiting the number of threads your hardware can run in
> parallel, the answer really depends on whether that's a limiting factor
> for your hardware or not.  I guess that if you don't have a post-RA
> scheduling pass the benefit you could possibly get from it is rather
> limited, it's probably safe to assume that you don't need it but it
> might be worth looking into.
> 

Ok, thanks for the explanation.  I probably won't have time to
investigate, but it's good knowing this is patch is a no-op for
r300g so I don't need to worry about regressions.

-Tom

> > -Tom
> >
> >
> >> Shader-db results on the i965 driver:
> >> 
> >> total instructions in shared programs: 5488539 -> 5488489 (-0.00%)
> >> instructions in affected programs: 1121 -> 1071 (-4.46%)
> >> helped:1
> >> HURT:  0
> >> GAINED:49
> >> LOST:  5
> >> 
> >> v2: Re-enable round-robin already for the lowest one of the nodes
> >> pushed optimistically onto the sack (Connor).
> >> ---
> >>  src/util/register_allocate.c | 23 ++-
> >>  1 file changed, 22 insertions(+), 1 deletion(-)
> >> 
> >> diff --git a/src/util/register_allocate.c b/src/util/register_allocate.c
> >> index af7a20c..b1ed273 100644
> >> --- a/src/util/register_allocate.c
> >> +++ b/src/util/register_allocate.c
> >> @@ -168,6 +168,12 @@ struct ra_graph {
> >>  
> >> unsigned int *stack;
> >> unsigned int stack_count;
> >> +
> >> +   /**
> >> +* Tracks the start of the set of optimistically-colored registers in 
> >> the
> >> +* stack.
> >> +*/
> >> +   unsigned int stack_optimistic_start;
> >>  };
> >>  
> >>  /**
> >> @@ -454,6 +460,7 @@ static void
> >>  ra_simplify(struct ra_graph *g)
> >>  {
> >> bool progress = true;
> >> +   unsigned int stack_optimistic_start = ~0;
> >> int i;
> >>  
> >> while (progress) {
> >> @@ -483,12 +490,16 @@ ra_simplify(struct ra_graph *g)
> >>  
> >>if (!progress && best_optimistic_node != ~0U) {
> >> decrement_q(g, best_optimistic_node);
> >> + stack_optimistic_start =
> >> +MIN2(stack_optimistic_start, g->stack_count);
> >> g->stack[g->stack_count] = best_optimistic_node;
> >> g->stack_count++;
> >> g->nodes[best_optimistic_node].in_stack = t

Re: [Mesa-dev] [PATCH 2/2] radeon/compiler: include stdio.h

2015-02-26 Thread Tom Stellard

On Thu, Feb 26, 2015 at 12:34:56PM -0700, Brian Paul wrote:
> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=89343

Reviewed-by: Tom Stellard 
> ---
>  src/gallium/drivers/r300/compiler/tests/radeon_compiler_optimize_tests.c | 1 
> +
>  1 file changed, 1 insertion(+)
> 
> diff --git 
> a/src/gallium/drivers/r300/compiler/tests/radeon_compiler_optimize_tests.c 
> b/src/gallium/drivers/r300/compiler/tests/radeon_compiler_optimize_tests.c
> index 3244d93..7672f68 100644
> --- a/src/gallium/drivers/r300/compiler/tests/radeon_compiler_optimize_tests.c
> +++ b/src/gallium/drivers/r300/compiler/tests/radeon_compiler_optimize_tests.c
> @@ -23,6 +23,7 @@
>   * Author: Tom Stellard 
>   */
>  
> +#include 
>  #include "radeon_compiler.h"
>  #include "radeon_dataflow.h"
>  
> -- 
> 1.9.1
> 
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 1/3] clover: Don't unconditionally define cl_khr_fp64

2015-02-26 Thread Tom Stellard

This should be done by the frontend for devices that support this
extension.
---
 src/gallium/state_trackers/clover/llvm/invocation.cpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/gallium/state_trackers/clover/llvm/invocation.cpp 
b/src/gallium/state_trackers/clover/llvm/invocation.cpp
index 508979a..9354812 100644
--- a/src/gallium/state_trackers/clover/llvm/invocation.cpp
+++ b/src/gallium/state_trackers/clover/llvm/invocation.cpp
@@ -203,7 +203,6 @@ namespace {
 
   // clc.h requires that this macro be defined:
   c.getPreprocessorOpts().addMacroDef("cl_clang_storage_class_specifiers");
-  c.getPreprocessorOpts().addMacroDef("cl_khr_fp64");
 
   c.getLangOpts().NoBuiltin = true;
   c.getTargetOpts().Triple = triple;
-- 
2.0.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 2/3] clover: Enable cl_khr_fp64 for devices that support doubles v2

2015-02-26 Thread Tom Stellard

v2:
  - Report correct values for CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE and
CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE.
  - Only define cl_khr_fp64 if the extension is supported.
  - Remove trailing space from extension string.
  - Rename device query function from cl_khr_fp64() to has_doubles().
---
 src/gallium/state_trackers/clover/api/device.cpp  | 6 +++---
 src/gallium/state_trackers/clover/core/device.cpp | 6 ++
 src/gallium/state_trackers/clover/core/device.hpp | 1 +
 3 files changed, 10 insertions(+), 3 deletions(-)

diff --git a/src/gallium/state_trackers/clover/api/device.cpp 
b/src/gallium/state_trackers/clover/api/device.cpp
index e825468..217d2c3 100644
--- a/src/gallium/state_trackers/clover/api/device.cpp
+++ b/src/gallium/state_trackers/clover/api/device.cpp
@@ -145,7 +145,7 @@ clGetDeviceInfo(cl_device_id d_dev, cl_device_info param,
   break;
 
case CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE:
-  buf.as_scalar() = 2;
+  buf.as_scalar() = dev.has_doubles() ? 2 : 0;
   break;
 
case CL_DEVICE_PREFERRED_VECTOR_WIDTH_HALF:
@@ -283,7 +283,7 @@ clGetDeviceInfo(cl_device_id d_dev, cl_device_info param,
   break;
 
case CL_DEVICE_EXTENSIONS:
-  buf.as_string() = "";
+  buf.as_string() = dev.has_doubles() ? "cl_khr_fp64" : "";
   break;
 
case CL_DEVICE_PLATFORM:
@@ -315,7 +315,7 @@ clGetDeviceInfo(cl_device_id d_dev, cl_device_info param,
   break;
 
case CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE:
-  buf.as_scalar() = 2;
+  buf.as_scalar() = dev.has_doubles() ? 2 : 0;
   break;
 
case CL_DEVICE_NATIVE_VECTOR_WIDTH_HALF:
diff --git a/src/gallium/state_trackers/clover/core/device.cpp 
b/src/gallium/state_trackers/clover/core/device.cpp
index 688a7dd..c3f3b4e 100644
--- a/src/gallium/state_trackers/clover/core/device.cpp
+++ b/src/gallium/state_trackers/clover/core/device.cpp
@@ -173,6 +173,12 @@ device::image_support() const {
   PIPE_COMPUTE_CAP_IMAGES_SUPPORTED)[0];
 }
 
+bool
+device::has_doubles() const {
+   return pipe->get_shader_param(pipe, PIPE_SHADER_COMPUTE,
+ PIPE_SHADER_CAP_DOUBLES);
+}
+
 std::vector
 device::max_block_size() const {
auto v = get_compute_param(pipe, PIPE_COMPUTE_CAP_MAX_BLOCK_SIZE);
diff --git a/src/gallium/state_trackers/clover/core/device.hpp 
b/src/gallium/state_trackers/clover/core/device.hpp
index 2201700..de5fc6b 100644
--- a/src/gallium/state_trackers/clover/core/device.hpp
+++ b/src/gallium/state_trackers/clover/core/device.hpp
@@ -64,6 +64,7 @@ namespace clover {
   cl_uint max_clock_frequency() const;
   cl_uint max_compute_units() const;
   bool image_support() const;
+  bool has_doubles() const;
 
   std::vector max_block_size() const;
   std::string device_name() const;
-- 
2.0.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 3/3] radeonsi/compute: Enable PIPE_SHADER_CAP_DOUBLES

2015-02-26 Thread Tom Stellard

---
 src/gallium/drivers/radeonsi/si_pipe.c | 7 +--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_pipe.c 
b/src/gallium/drivers/radeonsi/si_pipe.c
index 26182c2..c7a7622 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.c
+++ b/src/gallium/drivers/radeonsi/si_pipe.c
@@ -360,8 +360,11 @@ static int si_get_shader_param(struct pipe_screen* 
pscreen, unsigned shader, enu
return PIPE_SHADER_IR_NATIVE;
 #endif
case PIPE_SHADER_CAP_DOUBLES:
-   return 0; /* XXX: Enable doubles once the compiler can
-handle them. */
+#if HAVE_LLVM >= 0x0307
+   return 1;
+#else
+   return 0;
+#endif
case PIPE_SHADER_CAP_MAX_CONST_BUFFER_SIZE: {
uint64_t max_const_buffer_size;
pscreen->get_compute_param(pscreen,
-- 
2.0.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH] radeonsi: Add additional information to shader dumps

2015-02-26 Thread Tom Stellard

This adds SGPR count, VGPR count, shader size, LDS size, and scratch
usage to shader dumps.
---
 src/gallium/drivers/radeonsi/si_shader.c | 18 --
 1 file changed, 12 insertions(+), 6 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
b/src/gallium/drivers/radeonsi/si_shader.c
index e6849ad..c71bdf0 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -2604,17 +2604,23 @@ int si_shader_binary_read(struct si_screen *sscreen,
bool dump  = r600_can_dump_shader(&sscreen->b,
shader->selector ? shader->selector->tokens : NULL);
 
-   if (dump && !binary->disassembled) {
-   fprintf(stderr, "SI CODE:\n");
-   for (i = 0; i < binary->code_size; i+=4 ) {
-   fprintf(stderr, "@0x%x: %02x%02x%02x%02x\n", i, 
binary->code[i + 3],
+   si_shader_binary_read_config(sscreen, shader, 0);
+
+   if (dump) {
+   if (!binary->disassembled) {
+   fprintf(stderr, "SI CODE:\n");
+   for (i = 0; i < binary->code_size; i+=4 ) {
+   fprintf(stderr, "@0x%x: %02x%02x%02x%02x\n", i, 
binary->code[i + 3],
binary->code[i + 2], binary->code[i + 1],
binary->code[i]);
+   }
}
+   fprintf(stderr, "SGPRS: %d\nVGPRS: %d\nCode Size: %d 
bytes\nLDS: %d blocks\n"
+   "Scratch: %d bytes per wave\n",
+   shader->num_sgprs, shader->num_vgprs, binary->code_size,
+   shader->lds_size, shader->scratch_bytes_per_wave);
}
 
-   si_shader_binary_read_config(sscreen, shader, 0);
-
/* copy new shader */
code_size = binary->code_size + binary->rodata_size;
r600_resource_reference(&shader->bo, NULL);
-- 
1.8.5.5

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 2/2] configure: Leverage gcc warn options to enable safe use of C99 features where possible.

2015-02-27 Thread Tom Stellard

Hi,

This patch breaks the build for me:

CFLAGS="-g" CXXFLAGS="$CFLAGS" CC="ccache gcc" CXX="ccache g++"
./autogen.sh \
--prefix=/usr/local \
--with-dri-drivers="no" \
--with-gallium-drivers="r600,radeonsi" \
--enable-glx-tls \
--enable-debug \
--enable-shared-glapi \
--with-egl-platforms=x11,drm \
--enable-gallium-egl \
--enable-gallium-gbm \
--with-llvm-prefix=/usr/local/llvm/3.7 \
--enable-gallium-drm-loader \
--enable-gallium-compute-api \
--enable-opencl-icd \
--enable-opengl \
--disable-dri3 \
--enable-texture-float \
--with-llvm-shared-libs \
--enable-opencl \
--enable-gbm

  CC   glapi_libglapi_la-entry.lo
In file included from entry.c:49:0:
entry_x86-64_tls.h: In function 'entry_generate':
entry_x86-64_tls.h:105:29: error: pointer of type 'void *' used in arithmetic 
[-Werror=pointer-arith]
*((unsigned int *) (code + 5)) = addr;
 ^
cc1: some warnings being treated as errors
Makefile:1425: recipe for target 'shared_glapi_libglapi_la-entry.lo'
failed


On Fri, Feb 27, 2015 at 07:59:47AM -0800, Ian Romanick wrote:
> I like the idea as it should prevent future thrash.  There are a couple
> comments below.
> 
> On 02/26/2015 08:51 AM, Jose Fonseca wrote:
> > The main objective of this change is to enable Linux developers to use
> > more of C99 throughout Mesa, with confidence that the portions that need
> > to be built with MSVC -- and only those portions --, stay portable.
> > 
> > This is achieved by using the appropriate -Werror= options only on the
> > places they need to be used.
> > 
> > Unfortunately we still need MSVC 2008 on a few portions of the code
> > (namely llvmpipe and its dependencies).  I hope to eventually eliminate
> > this so that we can use C99 everywhere, but there are technical/logistic
> > challenges (specifically, newer Windows SDKs no longer bundle MSVC,
> > instead require a full installation of Visual Studio, and that has
> > hindered adoption of newer MSVC versions on our build processes.)
> > Thankfully we have more directy control over our OpenGL driver, which is
> > why we're now able to migrate to MSVC 2013 for most of the tree.
> > ---
> >  configure.ac   | 17 +
> >  src/egl/main/Makefile.am   |  1 +
> >  src/gallium/auxiliary/Makefile.am  |  7 +--
> >  src/gallium/drivers/llvmpipe/Makefile.am   |  6 --
> >  src/gallium/state_trackers/egl/Makefile.am |  3 ++-
> >  src/gallium/targets/egl-static/Makefile.am |  3 ++-
> >  src/glsl/Makefile.am   |  8 ++--
> >  src/loader/Makefile.am |  1 +
> >  src/mapi/Makefile.am   |  4 +++-
> >  src/mesa/Makefile.am   | 10 --
> >  src/util/Makefile.am   |  3 ++-
> >  11 files changed, 51 insertions(+), 12 deletions(-)
> > 
> > diff --git a/configure.ac b/configure.ac
> > index 5fbb7bc..22dc023 100644
> > --- a/configure.ac
> > +++ b/configure.ac
> > @@ -263,6 +263,18 @@ if test "x$GCC" = xyes; then
> >  # gcc's builtin memcmp is slower than glibc's
> >  # http://gcc.gnu.org/bugzilla/show_bug.cgi?id=43052
> >  CFLAGS="$CFLAGS -fno-builtin-memcmp"
> > +
> > +# Flags to help ensure that certain portions of the code -- and only 
> > those
> > +# portions -- can be built with MSVC:
> > +# - src/util, src/gallium/auxiliary, and src/gallium/drivers/llvmpipe 
> > needs
> > +#   to build with Windows SDK 7.0.7600, which bundles MSVC 2008
> > +# - non-Linux/Posix OpenGL portions needs to build on MSVC 2013 (which
> > +#   supports most of C99)
> > +# - the rest has no compiler compiler restrictions
> > +MSVC2013_COMPAT_CFLAGS="-Werror=vla -Werror=pointer-arith"
> > +MSVC2013_COMPAT_CXXFLAGS="-Werror=vla -Werror=pointer-arith"
> > +MSVC2008_COMPAT_CFLAGS="$MSVC2013_COMPAT_CFLAGS 
> > -Werror=declaration-after-statement"
> > +MSVC2008_COMPAT_CXXFLAGS="$MSVC2013_COMPAT_CXXFLAGS"
> >  fi
> >  if test "x$GXX" = xyes; then
> >  CXXFLAGS="$CXXFLAGS -Wall"
> > @@ -288,6 +300,11 @@ if test "x$GXX" = xyes; then
> >  CXXFLAGS="$CXXFLAGS -fno-builtin-memcmp"
> >  fi
> >  
> > +AC_SUBST([MSVC2013_COMPAT_CFLAGS])
> > +AC_SUBST([MSVC2013_COMPAT_CXXFLAGS])
> > +AC_SUBST([MSVC2008_COMPAT_CFLAGS])
> > +AC_SUBST([MSVC2008_COMPAT_CXXFLAGS])
> > +
> >  dnl even if the compiler appears to support it, using visibility 
> > attributes isn't
> >  dnl going to do anything useful currently on cygwin apart from emit lots 
> > of warnings
> >  case "$host_os" in
> > diff --git a/src/egl/main/Makefile.am b/src/egl/main/Makefile.am
> > index d21d8a9..a4db210 100644
> > --- a/src/egl/main/Makefile.am
> > +++ b/src/egl/main/Makefile.am
> > @@ -26,6 +26,7 @@ AM_CFLAGS = \
> > -I$(top_srcdir)/src/gbm/main \
> > $(DEFINES) \
> > $(VISIBILITY_CFLAGS) \
> > +   $(MSVC2013_COMPAT_CFLAGS) \
> > $(EGL_CFLAGS) \
> > -D_EGL_NATIVE_PLATFORM=$(EGL_NATIVE_PLATFORM) \
> > -D_EGL_DRIV

[Mesa-dev] [PATCH] egl: Report correct GBM formats

2015-03-02 Thread Tom Stellard

From: Daniel Stone 

This fixes almost all piglit regressions when running with
PIGLIT_PLATFORM=gbm

Tom Stellard:
  - Fix ARGB2101010 format

Cc: "10.4 10.5" 
---

I'm not sure if the commit message makes sense, I'm open to suggestions.

src/egl/drivers/dri2/platform_drm.c | 16 +++-
 1 file changed, 11 insertions(+), 5 deletions(-)

diff --git a/src/egl/drivers/dri2/platform_drm.c 
b/src/egl/drivers/dri2/platform_drm.c
index 02e87f7..bf205be 100644
--- a/src/egl/drivers/dri2/platform_drm.c
+++ b/src/egl/drivers/dri2/platform_drm.c
@@ -668,15 +668,21 @@ dri2_initialize_drm(_EGLDriver *drv, _EGLDisplay *disp)
 
for (i = 0; dri2_dpy->driver_configs[i]; i++) {
   EGLint format, attr_list[3];
-  unsigned int mask;
+  unsigned int red, alpha;
 
   dri2_dpy->core->getConfigAttrib(dri2_dpy->driver_configs[i],
-   __DRI_ATTRIB_RED_MASK, &mask);
-  if (mask == 0x3ff0)
+   __DRI_ATTRIB_RED_MASK, &red);
+  dri2_dpy->core->getConfigAttrib(dri2_dpy->driver_configs[i],
+   __DRI_ATTRIB_ALPHA_MASK, &alpha);
+  if (red == 0x3ff0 && alpha == 0x)
  format = GBM_FORMAT_XRGB2101010;
-  else if (mask == 0x00ff)
+  else if (red == 0x3ff0 && alpha == 0xc000)
+ format = GBM_FORMAT_ARGB2101010;
+  else if (red == 0x00ff && alpha == 0x)
  format = GBM_FORMAT_XRGB;
-  else if (mask == 0xf800)
+  else if (red == 0x00ff && alpha == 0xff00)
+ format = GBM_FORMAT_ARGB;
+  else if (red == 0xf800)
  format = GBM_FORMAT_RGB565;
   else
  continue;
-- 
2.0.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH] egl: Report correct GBM formats

2015-03-02 Thread Tom Stellard

From: Daniel Stone 

This fixes almost all piglit regressions when running with
PIGLIT_PLATFORM=gbm

Tom Stellard:
  - Fix ARGB2101010 format

Cc: "10.4 10.5" 
---

I'm not sure if the commit message makes sense, I'm open to suggestions.

src/egl/drivers/dri2/platform_drm.c | 16 +++-
 1 file changed, 11 insertions(+), 5 deletions(-)

diff --git a/src/egl/drivers/dri2/platform_drm.c 
b/src/egl/drivers/dri2/platform_drm.c
index 02e87f7..bf205be 100644
--- a/src/egl/drivers/dri2/platform_drm.c
+++ b/src/egl/drivers/dri2/platform_drm.c
@@ -668,15 +668,21 @@ dri2_initialize_drm(_EGLDriver *drv, _EGLDisplay *disp)
 
for (i = 0; dri2_dpy->driver_configs[i]; i++) {
   EGLint format, attr_list[3];
-  unsigned int mask;
+  unsigned int red, alpha;
 
   dri2_dpy->core->getConfigAttrib(dri2_dpy->driver_configs[i],
-   __DRI_ATTRIB_RED_MASK, &mask);
-  if (mask == 0x3ff0)
+   __DRI_ATTRIB_RED_MASK, &red);
+  dri2_dpy->core->getConfigAttrib(dri2_dpy->driver_configs[i],
+   __DRI_ATTRIB_ALPHA_MASK, &alpha);
+  if (red == 0x3ff0 && alpha == 0x)
  format = GBM_FORMAT_XRGB2101010;
-  else if (mask == 0x00ff)
+  else if (red == 0x3ff0 && alpha == 0xc000)
+ format = GBM_FORMAT_ARGB2101010;
+  else if (red == 0x00ff && alpha == 0x)
  format = GBM_FORMAT_XRGB;
-  else if (mask == 0xf800)
+  else if (red == 0x00ff && alpha == 0xff00)
+ format = GBM_FORMAT_ARGB;
+  else if (red == 0xf800)
  format = GBM_FORMAT_RGB565;
   else
  continue;
-- 
2.0.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 2/2] clover: Enable cl_khr_fp64 for devices that support doubles v3

2015-03-02 Thread Tom Stellard

v2:
  - Report correct values for CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE
and CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE.
  - Only define cl_khr_fp64 if the extension is supported.
  - Remove trailing space from extension string.
  - Rename device query function from cl_khr_fp86() to
has_doubles().

v3:
  - Return 0 for device::doubled_fp_confg() when doubles aren't
supported.
---
 src/gallium/state_trackers/clover/api/device.cpp  | 10 +++---
 src/gallium/state_trackers/clover/core/device.cpp | 21 +
 src/gallium/state_trackers/clover/core/device.hpp |  2 ++
 3 files changed, 30 insertions(+), 3 deletions(-)

diff --git a/src/gallium/state_trackers/clover/api/device.cpp 
b/src/gallium/state_trackers/clover/api/device.cpp
index e8750e8..a7ea34a 100644
--- a/src/gallium/state_trackers/clover/api/device.cpp
+++ b/src/gallium/state_trackers/clover/api/device.cpp
@@ -145,7 +145,7 @@ clGetDeviceInfo(cl_device_id d_dev, cl_device_info param,
   break;
 
case CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE:
-  buf.as_scalar() = 2;
+  buf.as_scalar() = dev.has_doubles() ? 2 : 0;
   break;
 
case CL_DEVICE_PREFERRED_VECTOR_WIDTH_HALF:
@@ -204,6 +204,10 @@ clGetDeviceInfo(cl_device_id d_dev, cl_device_info param,
   buf.as_scalar() = dev.single_fp_config();
   break;
 
+   case CL_DEVICE_DOUBLE_FP_CONFIG:
+  buf.as_scalar() = dev.double_fp_config();
+  break;
+
case CL_DEVICE_GLOBAL_MEM_CACHE_TYPE:
   buf.as_scalar() = CL_NONE;
   break;
@@ -282,7 +286,7 @@ clGetDeviceInfo(cl_device_id d_dev, cl_device_info param,
   break;
 
case CL_DEVICE_EXTENSIONS:
-  buf.as_string() = "";
+  buf.as_string() = dev.has_doubles() ? "cl_khr_fp64" : "";
   break;
 
case CL_DEVICE_PLATFORM:
@@ -314,7 +318,7 @@ clGetDeviceInfo(cl_device_id d_dev, cl_device_info param,
   break;
 
case CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE:
-  buf.as_scalar() = 2;
+  buf.as_scalar() = dev.has_doubles() ? 2 : 0;
   break;
 
case CL_DEVICE_NATIVE_VECTOR_WIDTH_HALF:
diff --git a/src/gallium/state_trackers/clover/core/device.cpp 
b/src/gallium/state_trackers/clover/core/device.cpp
index 34f0372..56f3570 100644
--- a/src/gallium/state_trackers/clover/core/device.cpp
+++ b/src/gallium/state_trackers/clover/core/device.cpp
@@ -179,6 +179,27 @@ device::single_fp_config() const {
return CL_FP_DENORM | CL_FP_INF_NAN | CL_FP_ROUND_TO_NEAREST;
 }
 
+cl_device_fp_config
+device::double_fp_config() const {
+   if (!has_doubles())
+  return 0;
+
+   // TODO: Get these from somewhere. This is the "mandated minimum double
+   // precision floating-point capability"
+   return CL_FP_FMA
+   | CL_FP_ROUND_TO_NEAREST
+   | CL_FP_ROUND_TO_ZERO
+   | CL_FP_ROUND_TO_INF
+   | CL_FP_INF_NAN
+   | CL_FP_DENORM;
+}
+
+bool
+device::has_doubles() const {
+   return pipe->get_shader_param(pipe, PIPE_SHADER_COMPUTE,
+ PIPE_SHADER_CAP_DOUBLES);
+}
+
 std::vector
 device::max_block_size() const {
auto v = get_compute_param(pipe, PIPE_COMPUTE_CAP_MAX_BLOCK_SIZE);
diff --git a/src/gallium/state_trackers/clover/core/device.hpp 
b/src/gallium/state_trackers/clover/core/device.hpp
index 6d3c2c3..3d08873 100644
--- a/src/gallium/state_trackers/clover/core/device.hpp
+++ b/src/gallium/state_trackers/clover/core/device.hpp
@@ -65,6 +65,8 @@ namespace clover {
   cl_uint max_compute_units() const;
   bool image_support() const;
   cl_device_fp_config single_fp_config() const;
+  cl_device_fp_config double_fp_config() const;
+  bool has_doubles() const;
 
   std::vector max_block_size() const;
   std::string device_name() const;
-- 
2.0.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 1/2] clover: Report a default value for CL_DEVICE_SINGLE_FP_CONFIG

2015-03-02 Thread Tom Stellard

---
 src/gallium/state_trackers/clover/api/device.cpp  | 3 +--
 src/gallium/state_trackers/clover/core/device.cpp | 6 ++
 src/gallium/state_trackers/clover/core/device.hpp | 1 +
 3 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/src/gallium/state_trackers/clover/api/device.cpp 
b/src/gallium/state_trackers/clover/api/device.cpp
index e825468..e8750e8 100644
--- a/src/gallium/state_trackers/clover/api/device.cpp
+++ b/src/gallium/state_trackers/clover/api/device.cpp
@@ -201,8 +201,7 @@ clGetDeviceInfo(cl_device_id d_dev, cl_device_info param,
   break;
 
case CL_DEVICE_SINGLE_FP_CONFIG:
-  buf.as_scalar() =
- CL_FP_DENORM | CL_FP_INF_NAN | CL_FP_ROUND_TO_NEAREST;
+  buf.as_scalar() = dev.single_fp_config();
   break;
 
case CL_DEVICE_GLOBAL_MEM_CACHE_TYPE:
diff --git a/src/gallium/state_trackers/clover/core/device.cpp 
b/src/gallium/state_trackers/clover/core/device.cpp
index 688a7dd..34f0372 100644
--- a/src/gallium/state_trackers/clover/core/device.cpp
+++ b/src/gallium/state_trackers/clover/core/device.cpp
@@ -173,6 +173,12 @@ device::image_support() const {
   PIPE_COMPUTE_CAP_IMAGES_SUPPORTED)[0];
 }
 
+cl_device_fp_config
+device::single_fp_config() const {
+   // TODO: Get these from somewhere.
+   return CL_FP_DENORM | CL_FP_INF_NAN | CL_FP_ROUND_TO_NEAREST;
+}
+
 std::vector
 device::max_block_size() const {
auto v = get_compute_param(pipe, PIPE_COMPUTE_CAP_MAX_BLOCK_SIZE);
diff --git a/src/gallium/state_trackers/clover/core/device.hpp 
b/src/gallium/state_trackers/clover/core/device.hpp
index 2201700..6d3c2c3 100644
--- a/src/gallium/state_trackers/clover/core/device.hpp
+++ b/src/gallium/state_trackers/clover/core/device.hpp
@@ -64,6 +64,7 @@ namespace clover {
   cl_uint max_clock_frequency() const;
   cl_uint max_compute_units() const;
   bool image_support() const;
+  cl_device_fp_config single_fp_config() const;
 
   std::vector max_block_size() const;
   std::string device_name() const;
-- 
2.0.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 1/9] gallium/radeon: don't use LLVMReadOnlyAttribute for ALU

2015-03-02 Thread Tom Stellard

On Mon, Mar 02, 2015 at 12:54:15PM +0100, Marek Olšák wrote:
> From: Marek Olšák 
> 

For some reason I thought doing this would require changes to LLVM,
but I guess I was wrong.

Reviewed-by: Tom Stellard 

> None of the instructions use a pointer argument.
> (+ small cosmetic changes)
> ---
>  .../drivers/radeon/radeon_setup_tgsi_llvm.c| 25 
> --
>  1 file changed, 9 insertions(+), 16 deletions(-)
> 
> diff --git a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c 
> b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
> index dce5b55..94ef675 100644
> --- a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
> +++ b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
> @@ -1224,6 +1224,7 @@ static void build_tgsi_intrinsic(
> emit_data->dst_type, emit_data->args,
> emit_data->arg_count, attr);
>  }
> +
>  void
>  build_tgsi_intrinsic_nomem(
>   const struct lp_build_tgsi_action * action,
> @@ -1233,14 +1234,6 @@ build_tgsi_intrinsic_nomem(
>   build_tgsi_intrinsic(action, bld_base, emit_data, 
> LLVMReadNoneAttribute);
>  }
>  
> -static void build_tgsi_intrinsic_readonly(
> - const struct lp_build_tgsi_action * action,
> - struct lp_build_tgsi_context * bld_base,
> - struct lp_build_emit_data * emit_data)
> -{
> - build_tgsi_intrinsic(action, bld_base, emit_data, 
> LLVMReadOnlyAttribute);
> -}
> -
>  void radeon_llvm_context_init(struct radeon_llvm_context * ctx)
>  {
>   struct lp_type type;
> @@ -1295,20 +1288,20 @@ void radeon_llvm_context_init(struct 
> radeon_llvm_context * ctx)
>  
>   lp_set_default_actions(bld_base);
>  
> - bld_base->op_actions[TGSI_OPCODE_ABS].emit = 
> build_tgsi_intrinsic_readonly;
> + bld_base->op_actions[TGSI_OPCODE_ABS].emit = build_tgsi_intrinsic_nomem;
>   bld_base->op_actions[TGSI_OPCODE_ABS].intr_name = "fabs";
> - bld_base->op_actions[TGSI_OPCODE_ARL].emit = emit_arl;
>   bld_base->op_actions[TGSI_OPCODE_AND].emit = emit_and;
> + bld_base->op_actions[TGSI_OPCODE_ARL].emit = emit_arl;
>   bld_base->op_actions[TGSI_OPCODE_BGNLOOP].emit = bgnloop_emit;
>   bld_base->op_actions[TGSI_OPCODE_BRK].emit = brk_emit;
> - bld_base->op_actions[TGSI_OPCODE_CEIL].emit = 
> build_tgsi_intrinsic_readonly;
> + bld_base->op_actions[TGSI_OPCODE_CEIL].emit = 
> build_tgsi_intrinsic_nomem;
>   bld_base->op_actions[TGSI_OPCODE_CEIL].intr_name = "ceil";
>   bld_base->op_actions[TGSI_OPCODE_CLAMP].emit = 
> build_tgsi_intrinsic_nomem;
>   bld_base->op_actions[TGSI_OPCODE_CLAMP].intr_name = "llvm.AMDIL.clamp.";
>   bld_base->op_actions[TGSI_OPCODE_CMP].emit = build_tgsi_intrinsic_nomem;
>   bld_base->op_actions[TGSI_OPCODE_CMP].intr_name = "llvm.AMDGPU.cndlt";
>   bld_base->op_actions[TGSI_OPCODE_CONT].emit = cont_emit;
> - bld_base->op_actions[TGSI_OPCODE_COS].emit = 
> build_tgsi_intrinsic_readonly;
> + bld_base->op_actions[TGSI_OPCODE_COS].emit = build_tgsi_intrinsic_nomem;
>   bld_base->op_actions[TGSI_OPCODE_COS].intr_name = "llvm.cos.f32";
>   bld_base->op_actions[TGSI_OPCODE_DDX].intr_name = "llvm.AMDGPU.ddx";
>   bld_base->op_actions[TGSI_OPCODE_DDX].fetch_args = tex_fetch_args;
> @@ -1319,7 +1312,7 @@ void radeon_llvm_context_init(struct 
> radeon_llvm_context * ctx)
>   bld_base->op_actions[TGSI_OPCODE_ENDLOOP].emit = endloop_emit;
>   bld_base->op_actions[TGSI_OPCODE_EX2].emit = build_tgsi_intrinsic_nomem;
>   bld_base->op_actions[TGSI_OPCODE_EX2].intr_name = "llvm.AMDIL.exp.";
> - bld_base->op_actions[TGSI_OPCODE_FLR].emit = 
> build_tgsi_intrinsic_readonly;
> + bld_base->op_actions[TGSI_OPCODE_FLR].emit = build_tgsi_intrinsic_nomem;
>   bld_base->op_actions[TGSI_OPCODE_FLR].intr_name = "floor";
>   bld_base->op_actions[TGSI_OPCODE_FRC].emit = build_tgsi_intrinsic_nomem;
>   bld_base->op_actions[TGSI_OPCODE_FRC].intr_name = 
> "llvm.AMDIL.fraction.";
> @@ -1348,14 +1341,14 @@ void radeon_llvm_context_init(struct 
> radeon_llvm_context * ctx)
>   bld_base->op_actions[TGSI_OPCODE_KILL_IF].intr_name = 
> "llvm.AMDGPU.kill";
>   bld_base->op_actions[TGSI_OPCODE_KILL].emit = lp_build_tgsi_intrinsic;
>   bld_base->op_actions[TGSI_OPCODE_KILL].intr_name = "llvm.AMDGPU.kilp";
> - bld_base->op_actions[TGSI_OPCODE_LG2].emit = 
> build_tgsi_intrinsic_readonly;
> + bld_base->op_actions[TGSI_OPCODE_LG2].emit = build_tgsi_intrinsic_nomem;
>   bld_base->op_actions[TGSI_OPCODE_LG2].intr_

Re: [Mesa-dev] [PATCH 2/9] radeonsi: use V_BFE for extracting a sample index

2015-03-02 Thread Tom Stellard

On Mon, Mar 02, 2015 at 12:54:16PM +0100, Marek Olšák wrote:
> From: Marek Olšák 
> 
> ---
>  src/gallium/drivers/radeonsi/si_shader.c | 22 --
>  1 file changed, 16 insertions(+), 6 deletions(-)
> 
> diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
> b/src/gallium/drivers/radeonsi/si_shader.c
> index b0417ed..f125483 100644
> --- a/src/gallium/drivers/radeonsi/si_shader.c
> +++ b/src/gallium/drivers/radeonsi/si_shader.c
> @@ -192,6 +192,20 @@ static int get_param_index(unsigned semantic_name, 
> unsigned index,
>  }
>  
>  /**
> + * BitField Extract: ((value >> rshift) & ((1 << bitwidth) - 1))
> + */

Ideally, we would just add a pattern for this in the backend and emit generic
LLVM IR here.  This would also make it possible to share the code with llvmpipe.

I think the best place to do this would be in 
AMDGPUTargetLowering::performDAGCombine().

-Tom

> +static LLVMValueRef build_bfe(struct gallivm_state *gallivm,
> +   LLVMValueRef value, LLVMValueRef rshift,
> +   LLVMValueRef bitwidth)
> +{
> + LLVMValueRef args[3] = {value, rshift, bitwidth};
> +
> + return build_intrinsic(gallivm->builder, "llvm.AMDGPU.bfe.u32",
> +LLVMInt32TypeInContext(gallivm->context),
> +args, Elements(args), LLVMReadNoneAttribute);
> +}
> +
> +/**
>   * Build an LLVM bytecode indexed load using LLVMBuildGEP + LLVMBuildLoad.
>   * It's equivalent to doing a load from &base_ptr[index].
>   *
> @@ -1721,7 +1735,6 @@ static void tex_fetch_args(
>  
>   /* Initialize some constants. */
>   LLVMValueRef four = LLVMConstInt(uint_bld->elem_type, 4, 0);
> - LLVMValueRef F = LLVMConstInt(uint_bld->elem_type, 0xF, 0);
>  
>   /* Apply the formula. */
>   LLVMValueRef fmask =
> @@ -1734,11 +1747,8 @@ static void tex_fetch_args(
>   LLVMValueRef sample_index4 =
>   LLVMBuildMul(gallivm->builder, address[sample_chan], 
> four, "");
>  
> - LLVMValueRef shifted_fmask =
> - LLVMBuildLShr(gallivm->builder, fmask, sample_index4, 
> "");
> -
> - LLVMValueRef final_sample =
> - LLVMBuildAnd(gallivm->builder, shifted_fmask, F, "");
> + LLVMValueRef final_sample = build_bfe(gallivm, fmask,
> +   sample_index4, four);
>  
>   /* Don't rewrite the sample index if WORD1.DATA_FORMAT of the 
> FMASK
>* resource descriptor is 0 (invalid),
> -- 
> 2.1.0
> 
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 2/9] radeonsi: use V_BFE for extracting a sample index

2015-03-02 Thread Tom Stellard

On Mon, Mar 02, 2015 at 10:14:00PM +0100, Marek Olšák wrote:
> On Mon, Mar 2, 2015 at 10:05 PM, Tom Stellard  wrote:
> > On Mon, Mar 02, 2015 at 12:54:16PM +0100, Marek Olšák wrote:
> >> From: Marek Olšák 
> >>
> >> ---
> >>  src/gallium/drivers/radeonsi/si_shader.c | 22 --
> >>  1 file changed, 16 insertions(+), 6 deletions(-)
> >>
> >> diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
> >> b/src/gallium/drivers/radeonsi/si_shader.c
> >> index b0417ed..f125483 100644
> >> --- a/src/gallium/drivers/radeonsi/si_shader.c
> >> +++ b/src/gallium/drivers/radeonsi/si_shader.c
> >> @@ -192,6 +192,20 @@ static int get_param_index(unsigned semantic_name, 
> >> unsigned index,
> >>  }
> >>
> >>  /**
> >> + * BitField Extract: ((value >> rshift) & ((1 << bitwidth) - 1))
> >> + */
> >
> > Ideally, we would just add a pattern for this in the backend and emit 
> > generic
> > LLVM IR here.  This would also make it possible to share the code with 
> > llvmpipe.
> >
> > I think the best place to do this would be in 
> > AMDGPUTargetLowering::performDAGCombine().
> 
> Why not SIInstructions.td?
> 

Because for patterns like this, I think it is important to match them as
early as possible, because there may be another optimization which reduces
the sequence from 5 to 4 instructions which would cause the pattern not to 
match.

-Tom

> Marek
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 1/2] r300g: Use PATH_MAX instead of limiting ourselves to 100 chars.

2015-03-03 Thread Tom Stellard

On Tue, Mar 03, 2015 at 04:12:56PM -0800, Matt Turner wrote:
> When built with Gentoo's package manager, the Mesa source directory
> exists seven directories deep. The path to the .test file is too long
> and is silently truncated, leading to a crash. Just use PATH_MAX.
> 
> Cc: 10.4, 10.5 
> Bugzilla: https://bugs.gentoo.org/show_bug.cgi?id=540970

Both patches:

Reviewed-by: Tom Stellard 

Thanks!

> ---
>  src/gallium/drivers/r300/compiler/tests/rc_test_helpers.c | 6 +++---
>  1 file changed, 3 insertions(+), 3 deletions(-)
> 
> diff --git a/src/gallium/drivers/r300/compiler/tests/rc_test_helpers.c 
> b/src/gallium/drivers/r300/compiler/tests/rc_test_helpers.c
> index 7c9d177..422bdb0 100644
> --- a/src/gallium/drivers/r300/compiler/tests/rc_test_helpers.c
> +++ b/src/gallium/drivers/r300/compiler/tests/rc_test_helpers.c
> @@ -28,6 +28,7 @@
>   */
>  
>  #include 
> +#include 
>  #include 
>  #include 
>  #include 
> @@ -528,7 +529,6 @@ void init_compiler(
>  }
>  
>  #define MAX_LINE_LENGTH 100
> -#define MAX_PATH_LENGTH 100
>  
>  unsigned load_program(
>   struct radeon_compiler *c,
> @@ -536,14 +536,14 @@ unsigned load_program(
>   const char *filename)
>  {
>   char line[MAX_LINE_LENGTH];
> - char path[MAX_PATH_LENGTH];
> + char path[PATH_MAX];
>   FILE *file;
>   unsigned *count;
>   char **string_store;
>   unsigned i = 0;
>  
>   memset(line, 0, sizeof(line));
> - snprintf(path, MAX_PATH_LENGTH, TEST_PATH "/%s", filename);
> + snprintf(path, PATH_MAX, TEST_PATH "/%s", filename);
>   file = fopen(path, "r");
>   if (!file) {
>   return 0;
> -- 
> 2.0.5
> 
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH] clover: Enable cl_khr_fp64 for devices that support doubles v4

2015-03-04 Thread Tom Stellard

v2:
  - Report correct values for CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE
and CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE.
  - Only define cl_khr_fp64 if the extension is supported.
  - Remove trailing space from extension string.
  - Rename device query function from cl_khr_fp86() to
has_doubles().

v3:
  - Return 0 for device::doubled_fp_confg() when doubles aren't
supported.

v4:
  - Remove device query for double fp_config.
---
 src/gallium/state_trackers/clover/api/device.cpp  | 21 ++---
 src/gallium/state_trackers/clover/core/device.cpp |  6 ++
 src/gallium/state_trackers/clover/core/device.hpp |  1 +
 3 files changed, 25 insertions(+), 3 deletions(-)

diff --git a/src/gallium/state_trackers/clover/api/device.cpp 
b/src/gallium/state_trackers/clover/api/device.cpp
index e825468..b1f556f 100644
--- a/src/gallium/state_trackers/clover/api/device.cpp
+++ b/src/gallium/state_trackers/clover/api/device.cpp
@@ -145,7 +145,7 @@ clGetDeviceInfo(cl_device_id d_dev, cl_device_info param,
   break;
 
case CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE:
-  buf.as_scalar() = 2;
+  buf.as_scalar() = dev.has_doubles() ? 2 : 0;
   break;
 
case CL_DEVICE_PREFERRED_VECTOR_WIDTH_HALF:
@@ -205,6 +205,21 @@ clGetDeviceInfo(cl_device_id d_dev, cl_device_info param,
  CL_FP_DENORM | CL_FP_INF_NAN | CL_FP_ROUND_TO_NEAREST;
   break;
 
+   case CL_DEVICE_DOUBLE_FP_CONFIG:
+  if (dev.has_doubles())
+ // This is the "mandated minimum double precision floating-point
+ // capability"
+ buf.as_scalar() =
+   CL_FP_FMA
+ | CL_FP_ROUND_TO_NEAREST
+ | CL_FP_ROUND_TO_ZERO
+ | CL_FP_ROUND_TO_INF
+ | CL_FP_INF_NAN
+ | CL_FP_DENORM;
+  else
+ buf.as_scalar() = 0;
+  break;
+
case CL_DEVICE_GLOBAL_MEM_CACHE_TYPE:
   buf.as_scalar() = CL_NONE;
   break;
@@ -283,7 +298,7 @@ clGetDeviceInfo(cl_device_id d_dev, cl_device_info param,
   break;
 
case CL_DEVICE_EXTENSIONS:
-  buf.as_string() = "";
+  buf.as_string() = dev.has_doubles() ? "cl_khr_fp64" : "";
   break;
 
case CL_DEVICE_PLATFORM:
@@ -315,7 +330,7 @@ clGetDeviceInfo(cl_device_id d_dev, cl_device_info param,
   break;
 
case CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE:
-  buf.as_scalar() = 2;
+  buf.as_scalar() = dev.has_doubles() ? 2 : 0;
   break;
 
case CL_DEVICE_NATIVE_VECTOR_WIDTH_HALF:
diff --git a/src/gallium/state_trackers/clover/core/device.cpp 
b/src/gallium/state_trackers/clover/core/device.cpp
index 688a7dd..c3f3b4e 100644
--- a/src/gallium/state_trackers/clover/core/device.cpp
+++ b/src/gallium/state_trackers/clover/core/device.cpp
@@ -173,6 +173,12 @@ device::image_support() const {
   PIPE_COMPUTE_CAP_IMAGES_SUPPORTED)[0];
 }
 
+bool
+device::has_doubles() const {
+   return pipe->get_shader_param(pipe, PIPE_SHADER_COMPUTE,
+ PIPE_SHADER_CAP_DOUBLES);
+}
+
 std::vector
 device::max_block_size() const {
auto v = get_compute_param(pipe, PIPE_COMPUTE_CAP_MAX_BLOCK_SIZE);
diff --git a/src/gallium/state_trackers/clover/core/device.hpp 
b/src/gallium/state_trackers/clover/core/device.hpp
index 2201700..de5fc6b 100644
--- a/src/gallium/state_trackers/clover/core/device.hpp
+++ b/src/gallium/state_trackers/clover/core/device.hpp
@@ -64,6 +64,7 @@ namespace clover {
   cl_uint max_clock_frequency() const;
   cl_uint max_compute_units() const;
   bool image_support() const;
+  bool has_doubles() const;
 
   std::vector max_block_size() const;
   std::string device_name() const;
-- 
2.0.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 2/9] radeonsi: use V_BFE for extracting a sample index

2015-03-05 Thread Tom Stellard

On Mon, Mar 02, 2015 at 02:09:29PM -0800, Matt Arsenault wrote:
> 
> > On Mar 2, 2015, at 1:19 PM, Tom Stellard  wrote:
> > 
> > On Mon, Mar 02, 2015 at 10:14:00PM +0100, Marek Olšák wrote:
> >> On Mon, Mar 2, 2015 at 10:05 PM, Tom Stellard  wrote:
> >>> On Mon, Mar 02, 2015 at 12:54:16PM +0100, Marek Olšák wrote:
> >>>> From: Marek Olšák 
> >>>> 
> >>>> ---
> >>>> src/gallium/drivers/radeonsi/si_shader.c | 22 --
> >>>> 1 file changed, 16 insertions(+), 6 deletions(-)
> >>>> 
> >>>> diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
> >>>> b/src/gallium/drivers/radeonsi/si_shader.c
> >>>> index b0417ed..f125483 100644
> >>>> --- a/src/gallium/drivers/radeonsi/si_shader.c
> >>>> +++ b/src/gallium/drivers/radeonsi/si_shader.c
> >>>> @@ -192,6 +192,20 @@ static int get_param_index(unsigned semantic_name, 
> >>>> unsigned index,
> >>>> }
> >>>> 
> >>>> /**
> >>>> + * BitField Extract: ((value >> rshift) & ((1 << bitwidth) - 1))
> >>>> + */
> >>> 
> >>> Ideally, we would just add a pattern for this in the backend and emit 
> >>> generic
> >>> LLVM IR here.  This would also make it possible to share the code with 
> >>> llvmpipe.
> >>> 
> >>> I think the best place to do this would be in 
> >>> AMDGPUTargetLowering::performDAGCombine().
> >> 
> >> Why not SIInstructions.td?
> >> 
> > 
> > Because for patterns like this, I think it is important to match them as
> > early as possible, because there may be another optimization which reduces
> > the sequence from 5 to 4 instructions which would cause the pattern not to 
> > match.
> > 
> > -Tom
> 
> 
> I think the opposite in this case. The basic bit operations have a lot of 
> existing combines on them, and the computeKnownBits implementations are more 
> complete. The BFE nodes are not as well understood, and trickier to deal 
> with. AArch64 and NVPTX both have essentially the same instruction, and they 
> have a large bit of code to match them in their ISelDAGToDAGs. I’ve wanted to 
> add a generic BFE node to be matched after legalization, but I haven’t had 
> time to do it.
> 

Isn't there a potential for a DAG combine on bit operations, to 'break'
the pattern so it can't be recognized?

-Tom

> -Matt
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 6/9] radeonsi: add support for SQRT

2015-03-05 Thread Tom Stellard

On Mon, Mar 02, 2015 at 12:54:20PM +0100, Marek Olšák wrote:
> From: Marek Olšák 
> 

Reviewed-by: Tom Stellard 

> ---
>  src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c | 2 ++
>  src/gallium/drivers/radeonsi/si_pipe.c  | 2 +-
>  2 files changed, 3 insertions(+), 1 deletion(-)
> 
> diff --git a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c 
> b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
> index 8026723..385d3ad 100644
> --- a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
> +++ b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
> @@ -1363,6 +1363,8 @@ void radeon_llvm_context_init(struct 
> radeon_llvm_context * ctx)
>   bld_base->op_actions[TGSI_OPCODE_SGT].emit = emit_cmp;
>   bld_base->op_actions[TGSI_OPCODE_SIN].emit = build_tgsi_intrinsic_nomem;
>   bld_base->op_actions[TGSI_OPCODE_SIN].intr_name = "llvm.sin.f32";
> + bld_base->op_actions[TGSI_OPCODE_SQRT].emit = 
> build_tgsi_intrinsic_nomem;
> + bld_base->op_actions[TGSI_OPCODE_SQRT].intr_name = "llvm.sqrt.f32";
>   bld_base->op_actions[TGSI_OPCODE_SSG].emit = emit_ssg;
>   bld_base->op_actions[TGSI_OPCODE_TEX].fetch_args = tex_fetch_args;
>   bld_base->op_actions[TGSI_OPCODE_TEX].intr_name = "llvm.AMDGPU.tex";
> diff --git a/src/gallium/drivers/radeonsi/si_pipe.c 
> b/src/gallium/drivers/radeonsi/si_pipe.c
> index 87eeac6..993b153 100644
> --- a/src/gallium/drivers/radeonsi/si_pipe.c
> +++ b/src/gallium/drivers/radeonsi/si_pipe.c
> @@ -403,7 +403,7 @@ static int si_get_shader_param(struct pipe_screen* 
> pscreen, unsigned shader, enu
>   case PIPE_SHADER_CAP_TGSI_CONT_SUPPORTED:
>   return 1;
>   case PIPE_SHADER_CAP_TGSI_SQRT_SUPPORTED:
> - return 0;
> + return 1;
>   case PIPE_SHADER_CAP_INDIRECT_INPUT_ADDR:
>   /* Indirection of geometry shader input dimension is not
>* handled yet
> -- 
> 2.1.0
> 
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 5/9] radeonsi: add support for FMA

2015-03-05 Thread Tom Stellard

On Mon, Mar 02, 2015 at 12:54:19PM +0100, Marek Olšák wrote:
> From: Marek Olšák 

Reviewed-by: Tom Stellard 
> 
> ---
>  src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c | 2 ++
>  src/gallium/drivers/radeonsi/si_pipe.c  | 3 ++-
>  2 files changed, 4 insertions(+), 1 deletion(-)
> 
> diff --git a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c 
> b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
> index 94ef675..8026723 100644
> --- a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
> +++ b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
> @@ -1314,6 +1314,8 @@ void radeon_llvm_context_init(struct 
> radeon_llvm_context * ctx)
>   bld_base->op_actions[TGSI_OPCODE_EX2].intr_name = "llvm.AMDIL.exp.";
>   bld_base->op_actions[TGSI_OPCODE_FLR].emit = build_tgsi_intrinsic_nomem;
>   bld_base->op_actions[TGSI_OPCODE_FLR].intr_name = "floor";
> + bld_base->op_actions[TGSI_OPCODE_FMA].emit = build_tgsi_intrinsic_nomem;
> + bld_base->op_actions[TGSI_OPCODE_FMA].intr_name = "llvm.fma.f32";
>   bld_base->op_actions[TGSI_OPCODE_FRC].emit = build_tgsi_intrinsic_nomem;
>   bld_base->op_actions[TGSI_OPCODE_FRC].intr_name = 
> "llvm.AMDIL.fraction.";
>   bld_base->op_actions[TGSI_OPCODE_F2I].emit = emit_f2i;
> diff --git a/src/gallium/drivers/radeonsi/si_pipe.c 
> b/src/gallium/drivers/radeonsi/si_pipe.c
> index 0aacab1..87eeac6 100644
> --- a/src/gallium/drivers/radeonsi/si_pipe.c
> +++ b/src/gallium/drivers/radeonsi/si_pipe.c
> @@ -425,8 +425,9 @@ static int si_get_shader_param(struct pipe_screen* 
> pscreen, unsigned shader, enu
>   case PIPE_SHADER_CAP_DOUBLES:
>   case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED:
>   case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED:
> - case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
>   return 0;
> + case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
> + return 1;
>   }
>   return 0;
>  }
> -- 
> 2.1.0
> 
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 3/9] radeonsi: use S_BFE/V_BFE for extracting bitfields from parameters

2015-03-05 Thread Tom Stellard

On Mon, Mar 02, 2015 at 12:54:17PM +0100, Marek Olšák wrote:
> From: Marek Olšák 

Reviewed-by: Tom Stellard 
> 
> And use AND/OR in special cases.
> 
> This universal helper will be used a lot (especially by tessellation).
> ---
>  src/gallium/drivers/radeonsi/si_shader.c | 48 
> +---
>  1 file changed, 32 insertions(+), 16 deletions(-)
> 
> diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
> b/src/gallium/drivers/radeonsi/si_shader.c
> index f125483..085a350 100644
> --- a/src/gallium/drivers/radeonsi/si_shader.c
> +++ b/src/gallium/drivers/radeonsi/si_shader.c
> @@ -206,6 +206,35 @@ static LLVMValueRef build_bfe(struct gallivm_state 
> *gallivm,
>  }
>  
>  /**
> + * Get the value of a shader input parameter and extract a bitfield.
> + */
> +static LLVMValueRef unpack_param(struct si_shader_context *si_shader_ctx,
> +  unsigned param, unsigned rshift,
> +  unsigned bitwidth)
> +{
> + struct gallivm_state *gallivm = &si_shader_ctx->radeon_bld.gallivm;
> + LLVMValueRef value = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn,
> +   param);
> +
> + if (rshift) {
> + if (rshift + bitwidth < 32)
> + return build_bfe(gallivm, value,
> +  lp_build_const_int32(gallivm, rshift),
> +  lp_build_const_int32(gallivm, 
> bitwidth));
> + else
> + return LLVMBuildLShr(gallivm->builder, value,
> +  lp_build_const_int32(gallivm, 
> rshift), "");
> + } else {
> + if (bitwidth < 32) {
> + unsigned mask = (1 << bitwidth) - 1;
> + return LLVMBuildAnd(gallivm->builder, value,
> + lp_build_const_int32(gallivm, 
> mask), "");
> + } else
> + return value;
> + }
> +}
> +
> +/**
>   * Build an LLVM bytecode indexed load using LLVMBuildGEP + LLVMBuildLoad.
>   * It's equivalent to doing a load from &base_ptr[index].
>   *
> @@ -575,14 +604,8 @@ static void declare_input_fs(
>  
>  static LLVMValueRef get_sample_id(struct radeon_llvm_context *radeon_bld)
>  {
> - struct gallivm_state *gallivm = &radeon_bld->gallivm;
> - LLVMValueRef value = LLVMGetParam(radeon_bld->main_fn,
> -   SI_PARAM_ANCILLARY);
> - value = LLVMBuildLShr(gallivm->builder, value,
> -   lp_build_const_int32(gallivm, 8), "");
> - value = LLVMBuildAnd(gallivm->builder, value,
> -  lp_build_const_int32(gallivm, 0xf), "");
> - return value;
> + return unpack_param(si_shader_context(&radeon_bld->soa.bld_base),
> + SI_PARAM_ANCILLARY, 8, 4);
>  }
>  
>  /**
> @@ -990,16 +1013,9 @@ static void si_llvm_emit_streamout(struct 
> si_shader_context *shader,
>  
>   LLVMTypeRef i32 = LLVMInt32TypeInContext(gallivm->context);
>  
> - LLVMValueRef so_param =
> - LLVMGetParam(shader->radeon_bld.main_fn,
> -  shader->param_streamout_config);
> -
>   /* Get bits [22:16], i.e. (so_param >> 16) & 127; */
>   LLVMValueRef so_vtx_count =
> - LLVMBuildAnd(builder,
> -  LLVMBuildLShr(builder, so_param,
> -LLVMConstInt(i32, 16, 0), ""),
> -  LLVMConstInt(i32, 127, 0), "");
> + unpack_param(shader, shader->param_streamout_config, 16, 7);
>  
>   LLVMValueRef tid = build_intrinsic(builder, "llvm.SI.tid", i32,
>  NULL, 0, LLVMReadNoneAttribute);
> -- 
> 2.1.0
> 
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 7/9] radeonsi: add support for easy opcodes from ARB_gpu_shader5

2015-03-05 Thread Tom Stellard

On Mon, Mar 02, 2015 at 12:54:21PM +0100, Marek Olšák wrote:
> From: Marek Olšák 
> 

I'm still unsure whether it's better to use intrinsics or LLVM IR
to implement these.  I will think about this some more.

-Tom

> ---
>  src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c | 8 
>  1 file changed, 8 insertions(+)
> 
> diff --git a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c 
> b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
> index 385d3ad..034095f 100644
> --- a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
> +++ b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
> @@ -1293,6 +1293,8 @@ void radeon_llvm_context_init(struct 
> radeon_llvm_context * ctx)
>   bld_base->op_actions[TGSI_OPCODE_AND].emit = emit_and;
>   bld_base->op_actions[TGSI_OPCODE_ARL].emit = emit_arl;
>   bld_base->op_actions[TGSI_OPCODE_BGNLOOP].emit = bgnloop_emit;
> + bld_base->op_actions[TGSI_OPCODE_BREV].emit = 
> build_tgsi_intrinsic_nomem;
> + bld_base->op_actions[TGSI_OPCODE_BREV].intr_name = "llvm.AMDGPU.brev";
>   bld_base->op_actions[TGSI_OPCODE_BRK].emit = brk_emit;
>   bld_base->op_actions[TGSI_OPCODE_CEIL].emit = 
> build_tgsi_intrinsic_nomem;
>   bld_base->op_actions[TGSI_OPCODE_CEIL].intr_name = "ceil";
> @@ -1326,6 +1328,8 @@ void radeon_llvm_context_init(struct 
> radeon_llvm_context * ctx)
>   bld_base->op_actions[TGSI_OPCODE_FSNE].emit = emit_fcmp;
>   bld_base->op_actions[TGSI_OPCODE_IABS].emit = 
> build_tgsi_intrinsic_nomem;
>   bld_base->op_actions[TGSI_OPCODE_IABS].intr_name = "llvm.AMDIL.abs.";
> + bld_base->op_actions[TGSI_OPCODE_IBFE].emit = 
> build_tgsi_intrinsic_nomem;
> + bld_base->op_actions[TGSI_OPCODE_IBFE].intr_name = 
> "llvm.AMDGPU.bfe.i32";
>   bld_base->op_actions[TGSI_OPCODE_IDIV].emit = emit_idiv;
>   bld_base->op_actions[TGSI_OPCODE_IF].emit = if_emit;
>   bld_base->op_actions[TGSI_OPCODE_UIF].emit = uif_emit;
> @@ -1350,6 +1354,8 @@ void radeon_llvm_context_init(struct 
> radeon_llvm_context * ctx)
>   bld_base->op_actions[TGSI_OPCODE_MOD].emit = emit_mod;
>   bld_base->op_actions[TGSI_OPCODE_NOT].emit = emit_not;
>   bld_base->op_actions[TGSI_OPCODE_OR].emit = emit_or;
> + bld_base->op_actions[TGSI_OPCODE_POPC].emit = 
> build_tgsi_intrinsic_nomem;
> + bld_base->op_actions[TGSI_OPCODE_POPC].intr_name = "llvm.ctpop.i32";
>   bld_base->op_actions[TGSI_OPCODE_POW].emit = build_tgsi_intrinsic_nomem;
>   bld_base->op_actions[TGSI_OPCODE_POW].intr_name = "llvm.pow.f32";
>   bld_base->op_actions[TGSI_OPCODE_ROUND].emit = 
> build_tgsi_intrinsic_nomem;
> @@ -1389,6 +1395,8 @@ void radeon_llvm_context_init(struct 
> radeon_llvm_context * ctx)
>   bld_base->op_actions[TGSI_OPCODE_TRUNC].emit = 
> build_tgsi_intrinsic_nomem;
>   bld_base->op_actions[TGSI_OPCODE_TRUNC].intr_name = "llvm.AMDGPU.trunc";
>   bld_base->op_actions[TGSI_OPCODE_UADD].emit = emit_uadd;
> + bld_base->op_actions[TGSI_OPCODE_UBFE].emit = 
> build_tgsi_intrinsic_nomem;
> + bld_base->op_actions[TGSI_OPCODE_UBFE].intr_name = 
> "llvm.AMDGPU.bfe.u32";
>   bld_base->op_actions[TGSI_OPCODE_UDIV].emit = emit_udiv;
>   bld_base->op_actions[TGSI_OPCODE_UMAX].emit = 
> build_tgsi_intrinsic_nomem;
>   bld_base->op_actions[TGSI_OPCODE_UMAX].intr_name = "llvm.AMDGPU.umax";
> -- 
> 2.1.0
> 
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH] clover: Return the minimum required value for CL_DEVICE_SINGLE_FP_CONFIG

2015-03-05 Thread Tom Stellard

This means dropping CL_FP_DENORM from the current return value.
---
 src/gallium/state_trackers/clover/api/device.cpp | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/gallium/state_trackers/clover/api/device.cpp 
b/src/gallium/state_trackers/clover/api/device.cpp
index b1f556f..db3b931 100644
--- a/src/gallium/state_trackers/clover/api/device.cpp
+++ b/src/gallium/state_trackers/clover/api/device.cpp
@@ -201,8 +201,10 @@ clGetDeviceInfo(cl_device_id d_dev, cl_device_info param,
   break;
 
case CL_DEVICE_SINGLE_FP_CONFIG:
+  // This is the "mandated minimum single precision floating-point
+  // capability"
   buf.as_scalar() =
- CL_FP_DENORM | CL_FP_INF_NAN | CL_FP_ROUND_TO_NEAREST;
+ CL_FP_INF_NAN | CL_FP_ROUND_TO_NEAREST;
   break;
 
case CL_DEVICE_DOUBLE_FP_CONFIG:
-- 
2.0.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 2/9] radeonsi: use V_BFE for extracting a sample index

2015-03-05 Thread Tom Stellard

On Thu, Mar 05, 2015 at 05:14:09PM +0100, Marek Olšák wrote:
> Since you acked patch #3, which depends on this, I assume this has your Rb 
> too?
> 

No, I still want to resolved with Matt what to do about intrinsics vs
IR.

-Tom

> Marek
> 
> On Mon, Mar 2, 2015 at 10:05 PM, Tom Stellard  wrote:
> > On Mon, Mar 02, 2015 at 12:54:16PM +0100, Marek Olšák wrote:
> >> From: Marek Olšák 
> >>
> >> ---
> >>  src/gallium/drivers/radeonsi/si_shader.c | 22 --
> >>  1 file changed, 16 insertions(+), 6 deletions(-)
> >>
> >> diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
> >> b/src/gallium/drivers/radeonsi/si_shader.c
> >> index b0417ed..f125483 100644
> >> --- a/src/gallium/drivers/radeonsi/si_shader.c
> >> +++ b/src/gallium/drivers/radeonsi/si_shader.c
> >> @@ -192,6 +192,20 @@ static int get_param_index(unsigned semantic_name, 
> >> unsigned index,
> >>  }
> >>
> >>  /**
> >> + * BitField Extract: ((value >> rshift) & ((1 << bitwidth) - 1))
> >> + */
> >
> > Ideally, we would just add a pattern for this in the backend and emit 
> > generic
> > LLVM IR here.  This would also make it possible to share the code with 
> > llvmpipe.
> >
> > I think the best place to do this would be in 
> > AMDGPUTargetLowering::performDAGCombine().
> >
> > -Tom
> >
> >> +static LLVMValueRef build_bfe(struct gallivm_state *gallivm,
> >> +   LLVMValueRef value, LLVMValueRef rshift,
> >> +   LLVMValueRef bitwidth)
> >> +{
> >> + LLVMValueRef args[3] = {value, rshift, bitwidth};
> >> +
> >> + return build_intrinsic(gallivm->builder, "llvm.AMDGPU.bfe.u32",
> >> +LLVMInt32TypeInContext(gallivm->context),
> >> +args, Elements(args), LLVMReadNoneAttribute);
> >> +}
> >> +
> >> +/**
> >>   * Build an LLVM bytecode indexed load using LLVMBuildGEP + LLVMBuildLoad.
> >>   * It's equivalent to doing a load from &base_ptr[index].
> >>   *
> >> @@ -1721,7 +1735,6 @@ static void tex_fetch_args(
> >>
> >>   /* Initialize some constants. */
> >>   LLVMValueRef four = LLVMConstInt(uint_bld->elem_type, 4, 0);
> >> - LLVMValueRef F = LLVMConstInt(uint_bld->elem_type, 0xF, 0);
> >>
> >>   /* Apply the formula. */
> >>   LLVMValueRef fmask =
> >> @@ -1734,11 +1747,8 @@ static void tex_fetch_args(
> >>   LLVMValueRef sample_index4 =
> >>   LLVMBuildMul(gallivm->builder, address[sample_chan], 
> >> four, "");
> >>
> >> - LLVMValueRef shifted_fmask =
> >> - LLVMBuildLShr(gallivm->builder, fmask, 
> >> sample_index4, "");
> >> -
> >> - LLVMValueRef final_sample =
> >> - LLVMBuildAnd(gallivm->builder, shifted_fmask, F, "");
> >> + LLVMValueRef final_sample = build_bfe(gallivm, fmask,
> >> +   sample_index4, four);
> >>
> >>   /* Don't rewrite the sample index if WORD1.DATA_FORMAT of 
> >> the FMASK
> >>* resource descriptor is 0 (invalid),
> >> --
> >> 2.1.0
> >>
> >> ___
> >> mesa-dev mailing list
> >> mesa-dev@lists.freedesktop.org
> >> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 7/9] radeonsi: add support for easy opcodes from ARB_gpu_shader5

2015-03-05 Thread Tom Stellard

On Mon, Mar 02, 2015 at 12:54:21PM +0100, Marek Olšák wrote:
> From: Marek Olšák 
> 

Hi Marek,

After discussing with Matt, I think we should use LLVM IR rather than
intrinsics for IBFE and UBFE and then add patterns for them either in
the TableGen Files or AMDGPUISelDAGToDAG.cpp.

Using intrinsics for BREV and POPC is fine though.

-Tom

> ---
>  src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c | 8 
>  1 file changed, 8 insertions(+)
> 
> diff --git a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c 
> b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
> index 385d3ad..034095f 100644
> --- a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
> +++ b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
> @@ -1293,6 +1293,8 @@ void radeon_llvm_context_init(struct 
> radeon_llvm_context * ctx)
>   bld_base->op_actions[TGSI_OPCODE_AND].emit = emit_and;
>   bld_base->op_actions[TGSI_OPCODE_ARL].emit = emit_arl;
>   bld_base->op_actions[TGSI_OPCODE_BGNLOOP].emit = bgnloop_emit;
> + bld_base->op_actions[TGSI_OPCODE_BREV].emit = 
> build_tgsi_intrinsic_nomem;
> + bld_base->op_actions[TGSI_OPCODE_BREV].intr_name = "llvm.AMDGPU.brev";
>   bld_base->op_actions[TGSI_OPCODE_BRK].emit = brk_emit;
>   bld_base->op_actions[TGSI_OPCODE_CEIL].emit = 
> build_tgsi_intrinsic_nomem;
>   bld_base->op_actions[TGSI_OPCODE_CEIL].intr_name = "ceil";
> @@ -1326,6 +1328,8 @@ void radeon_llvm_context_init(struct 
> radeon_llvm_context * ctx)
>   bld_base->op_actions[TGSI_OPCODE_FSNE].emit = emit_fcmp;
>   bld_base->op_actions[TGSI_OPCODE_IABS].emit = 
> build_tgsi_intrinsic_nomem;
>   bld_base->op_actions[TGSI_OPCODE_IABS].intr_name = "llvm.AMDIL.abs.";
> + bld_base->op_actions[TGSI_OPCODE_IBFE].emit = 
> build_tgsi_intrinsic_nomem;
> + bld_base->op_actions[TGSI_OPCODE_IBFE].intr_name = 
> "llvm.AMDGPU.bfe.i32";
>   bld_base->op_actions[TGSI_OPCODE_IDIV].emit = emit_idiv;
>   bld_base->op_actions[TGSI_OPCODE_IF].emit = if_emit;
>   bld_base->op_actions[TGSI_OPCODE_UIF].emit = uif_emit;
> @@ -1350,6 +1354,8 @@ void radeon_llvm_context_init(struct 
> radeon_llvm_context * ctx)
>   bld_base->op_actions[TGSI_OPCODE_MOD].emit = emit_mod;
>   bld_base->op_actions[TGSI_OPCODE_NOT].emit = emit_not;
>   bld_base->op_actions[TGSI_OPCODE_OR].emit = emit_or;
> + bld_base->op_actions[TGSI_OPCODE_POPC].emit = 
> build_tgsi_intrinsic_nomem;
> + bld_base->op_actions[TGSI_OPCODE_POPC].intr_name = "llvm.ctpop.i32";
>   bld_base->op_actions[TGSI_OPCODE_POW].emit = build_tgsi_intrinsic_nomem;
>   bld_base->op_actions[TGSI_OPCODE_POW].intr_name = "llvm.pow.f32";
>   bld_base->op_actions[TGSI_OPCODE_ROUND].emit = 
> build_tgsi_intrinsic_nomem;
> @@ -1389,6 +1395,8 @@ void radeon_llvm_context_init(struct 
> radeon_llvm_context * ctx)
>   bld_base->op_actions[TGSI_OPCODE_TRUNC].emit = 
> build_tgsi_intrinsic_nomem;
>   bld_base->op_actions[TGSI_OPCODE_TRUNC].intr_name = "llvm.AMDGPU.trunc";
>   bld_base->op_actions[TGSI_OPCODE_UADD].emit = emit_uadd;
> + bld_base->op_actions[TGSI_OPCODE_UBFE].emit = 
> build_tgsi_intrinsic_nomem;
> + bld_base->op_actions[TGSI_OPCODE_UBFE].intr_name = 
> "llvm.AMDGPU.bfe.u32";
>   bld_base->op_actions[TGSI_OPCODE_UDIV].emit = emit_udiv;
>   bld_base->op_actions[TGSI_OPCODE_UMAX].emit = 
> build_tgsi_intrinsic_nomem;
>   bld_base->op_actions[TGSI_OPCODE_UMAX].intr_name = "llvm.AMDGPU.umax";
> -- 
> 2.1.0
> 
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] clover: Return the minimum required value for CL_DEVICE_SINGLE_FP_CONFIG

2015-03-06 Thread Tom Stellard

On Thu, Mar 05, 2015 at 08:42:25PM +0200, Francisco Jerez wrote:
> Tom Stellard  writes:
> 
> > This means dropping CL_FP_DENORM from the current return value.
> > ---
> >  src/gallium/state_trackers/clover/api/device.cpp | 4 +++-
> >  1 file changed, 3 insertions(+), 1 deletion(-)
> >
> > diff --git a/src/gallium/state_trackers/clover/api/device.cpp 
> > b/src/gallium/state_trackers/clover/api/device.cpp
> > index b1f556f..db3b931 100644
> > --- a/src/gallium/state_trackers/clover/api/device.cpp
> > +++ b/src/gallium/state_trackers/clover/api/device.cpp
> > @@ -201,8 +201,10 @@ clGetDeviceInfo(cl_device_id d_dev, cl_device_info 
> > param,
> >break;
> >  
> > case CL_DEVICE_SINGLE_FP_CONFIG:
> > +  // This is the "mandated minimum single precision floating-point
> > +  // capability"
> 
> Could you add that this is according to the OpenCL 1.1 specification?
> OpenCL 1.2 is even weaker (CL_FP_INF_NAN is not required, only one of
> CL_FP_ROUND_TO_ZERO or CL_FP_ROUND_TO_NEAREST is required, and no FP
> capabilities at all are required for custom devices as Jan pointed out).
> 
> >buf.as_scalar() =
> > - CL_FP_DENORM | CL_FP_INF_NAN | CL_FP_ROUND_TO_NEAREST;
> > + CL_FP_INF_NAN | CL_FP_ROUND_TO_NEAREST;
> 
> I'm okay with this change, but I'm curious, is this motivated by your
> architecture not supporting denorms?
> 

It can, but supporting them hurts performance.

-Tom

> >break;
> >  
> > case CL_DEVICE_DOUBLE_FP_CONFIG:
> > -- 
> > 2.0.4




> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 2/2] radeonsi/compute: Use value from compiler for COMPUTE_PGM_RSRC1.FLOAT_MODE

2015-03-06 Thread Tom Stellard

---
 src/gallium/drivers/radeonsi/si_compute.c | 3 ++-
 src/gallium/drivers/radeonsi/si_shader.c  | 1 +
 src/gallium/drivers/radeonsi/si_shader.h  | 1 +
 3 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/src/gallium/drivers/radeonsi/si_compute.c 
b/src/gallium/drivers/radeonsi/si_compute.c
index 5009f69..8609b89 100644
--- a/src/gallium/drivers/radeonsi/si_compute.c
+++ b/src/gallium/drivers/radeonsi/si_compute.c
@@ -377,7 +377,8 @@ static void si_launch_grid(
 * XXX: The compiler should account for this.
 */
|  S_00B848_SGPRS(((MAX2(4 + arg_user_sgpr_count,
-   shader->num_sgprs)) - 1) / 8))
+   shader->num_sgprs)) - 1) / 8)
+   |  S_00B028_FLOAT_MODE(shader->float_mode))
;
 
lds_blocks = shader->lds_size;
diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
b/src/gallium/drivers/radeonsi/si_shader.c
index b0417ed..87aef4d 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -2546,6 +2546,7 @@ void si_shader_binary_read_config(const struct si_screen 
*sscreen,
case R_00B848_COMPUTE_PGM_RSRC1:
shader->num_sgprs = MAX2(shader->num_sgprs, 
(G_00B028_SGPRS(value) + 1) * 8);
shader->num_vgprs = MAX2(shader->num_vgprs, 
(G_00B028_VGPRS(value) + 1) * 4);
+   shader->float_mode =  G_00B028_FLOAT_MODE(value);
break;
case R_00B02C_SPI_SHADER_PGM_RSRC2_PS:
shader->lds_size = MAX2(shader->lds_size, 
G_00B02C_EXTRA_LDS_SIZE(value));
diff --git a/src/gallium/drivers/radeonsi/si_shader.h 
b/src/gallium/drivers/radeonsi/si_shader.h
index 551c7dc..4f2bb91 100644
--- a/src/gallium/drivers/radeonsi/si_shader.h
+++ b/src/gallium/drivers/radeonsi/si_shader.h
@@ -149,6 +149,7 @@ struct si_shader {
unsignednum_vgprs;
unsignedlds_size;
unsignedspi_ps_input_ena;
+   unsignedfloat_mode;
unsignedscratch_bytes_per_wave;
unsignedspi_shader_col_format;
unsignedspi_shader_z_format;
-- 
2.0.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 1/2] clover: Return the minimum required value for CL_DEVICE_SINGLE_FP_CONFIG v2

2015-03-06 Thread Tom Stellard

This means dropping CL_FP_DENORM from the current return value.

v2:
  - Add comments about minimum values for OpenCL 1.2.
---
 src/gallium/state_trackers/clover/api/device.cpp | 5 -
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/src/gallium/state_trackers/clover/api/device.cpp 
b/src/gallium/state_trackers/clover/api/device.cpp
index b1f556f..b79997f 100644
--- a/src/gallium/state_trackers/clover/api/device.cpp
+++ b/src/gallium/state_trackers/clover/api/device.cpp
@@ -201,8 +201,11 @@ clGetDeviceInfo(cl_device_id d_dev, cl_device_info param,
   break;
 
case CL_DEVICE_SINGLE_FP_CONFIG:
+  // This is the "mandated minimum single precision floating-point
+  // capability" for OpenCL 1.1.  In OpenCL 1.2, CL_FP_INF_NAN
+  // is no longer required and nothing is required for custom devices.
   buf.as_scalar() =
- CL_FP_DENORM | CL_FP_INF_NAN | CL_FP_ROUND_TO_NEAREST;
+ CL_FP_INF_NAN | CL_FP_ROUND_TO_NEAREST;
   break;
 
case CL_DEVICE_DOUBLE_FP_CONFIG:
-- 
2.0.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 7/9] radeonsi: add support for easy opcodes from ARB_gpu_shader5

2015-03-10 Thread Tom Stellard

On Tue, Mar 10, 2015 at 12:42:38PM +0100, Marek Olšák wrote:
> OK. What about patches 8 an 9?
> 

I think the intrinsics in 9 are OK, but 8 should be using LLVM IR.

-Tom

> Marek
> 
> On Thu, Mar 5, 2015 at 8:30 PM, Tom Stellard  wrote:
> > On Mon, Mar 02, 2015 at 12:54:21PM +0100, Marek Olšák wrote:
> >> From: Marek Olšák 
> >>
> >
> > Hi Marek,
> >
> > After discussing with Matt, I think we should use LLVM IR rather than
> > intrinsics for IBFE and UBFE and then add patterns for them either in
> > the TableGen Files or AMDGPUISelDAGToDAG.cpp.
> >
> > Using intrinsics for BREV and POPC is fine though.
> >
> > -Tom
> >
> >> ---
> >>  src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c | 8 
> >>  1 file changed, 8 insertions(+)
> >>
> >> diff --git a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c 
> >> b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
> >> index 385d3ad..034095f 100644
> >> --- a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
> >> +++ b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
> >> @@ -1293,6 +1293,8 @@ void radeon_llvm_context_init(struct 
> >> radeon_llvm_context * ctx)
> >>   bld_base->op_actions[TGSI_OPCODE_AND].emit = emit_and;
> >>   bld_base->op_actions[TGSI_OPCODE_ARL].emit = emit_arl;
> >>   bld_base->op_actions[TGSI_OPCODE_BGNLOOP].emit = bgnloop_emit;
> >> + bld_base->op_actions[TGSI_OPCODE_BREV].emit = 
> >> build_tgsi_intrinsic_nomem;
> >> + bld_base->op_actions[TGSI_OPCODE_BREV].intr_name = 
> >> "llvm.AMDGPU.brev";
> >>   bld_base->op_actions[TGSI_OPCODE_BRK].emit = brk_emit;
> >>   bld_base->op_actions[TGSI_OPCODE_CEIL].emit = 
> >> build_tgsi_intrinsic_nomem;
> >>   bld_base->op_actions[TGSI_OPCODE_CEIL].intr_name = "ceil";
> >> @@ -1326,6 +1328,8 @@ void radeon_llvm_context_init(struct 
> >> radeon_llvm_context * ctx)
> >>   bld_base->op_actions[TGSI_OPCODE_FSNE].emit = emit_fcmp;
> >>   bld_base->op_actions[TGSI_OPCODE_IABS].emit = 
> >> build_tgsi_intrinsic_nomem;
> >>   bld_base->op_actions[TGSI_OPCODE_IABS].intr_name = "llvm.AMDIL.abs.";
> >> + bld_base->op_actions[TGSI_OPCODE_IBFE].emit = 
> >> build_tgsi_intrinsic_nomem;
> >> + bld_base->op_actions[TGSI_OPCODE_IBFE].intr_name = 
> >> "llvm.AMDGPU.bfe.i32";
> >>   bld_base->op_actions[TGSI_OPCODE_IDIV].emit = emit_idiv;
> >>   bld_base->op_actions[TGSI_OPCODE_IF].emit = if_emit;
> >>   bld_base->op_actions[TGSI_OPCODE_UIF].emit = uif_emit;
> >> @@ -1350,6 +1354,8 @@ void radeon_llvm_context_init(struct 
> >> radeon_llvm_context * ctx)
> >>   bld_base->op_actions[TGSI_OPCODE_MOD].emit = emit_mod;
> >>   bld_base->op_actions[TGSI_OPCODE_NOT].emit = emit_not;
> >>   bld_base->op_actions[TGSI_OPCODE_OR].emit = emit_or;
> >> + bld_base->op_actions[TGSI_OPCODE_POPC].emit = 
> >> build_tgsi_intrinsic_nomem;
> >> + bld_base->op_actions[TGSI_OPCODE_POPC].intr_name = "llvm.ctpop.i32";
> >>   bld_base->op_actions[TGSI_OPCODE_POW].emit = 
> >> build_tgsi_intrinsic_nomem;
> >>   bld_base->op_actions[TGSI_OPCODE_POW].intr_name = "llvm.pow.f32";
> >>   bld_base->op_actions[TGSI_OPCODE_ROUND].emit = 
> >> build_tgsi_intrinsic_nomem;
> >> @@ -1389,6 +1395,8 @@ void radeon_llvm_context_init(struct 
> >> radeon_llvm_context * ctx)
> >>   bld_base->op_actions[TGSI_OPCODE_TRUNC].emit = 
> >> build_tgsi_intrinsic_nomem;
> >>   bld_base->op_actions[TGSI_OPCODE_TRUNC].intr_name = 
> >> "llvm.AMDGPU.trunc";
> >>   bld_base->op_actions[TGSI_OPCODE_UADD].emit = emit_uadd;
> >> + bld_base->op_actions[TGSI_OPCODE_UBFE].emit = 
> >> build_tgsi_intrinsic_nomem;
> >> + bld_base->op_actions[TGSI_OPCODE_UBFE].intr_name = 
> >> "llvm.AMDGPU.bfe.u32";
> >>   bld_base->op_actions[TGSI_OPCODE_UDIV].emit = emit_udiv;
> >>   bld_base->op_actions[TGSI_OPCODE_UMAX].emit = 
> >> build_tgsi_intrinsic_nomem;
> >>   bld_base->op_actions[TGSI_OPCODE_UMAX].intr_name = 
> >> "llvm.AMDGPU.umax";
> >> --
> >> 2.1.0
> >>
> >> ___
> >> mesa-dev mailing list
> >> mesa-dev@lists.freedesktop.org
> >> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 7/9] radeonsi: add support for easy opcodes from ARB_gpu_shader5

2015-03-10 Thread Tom Stellard

 Or:
> >
> > # if bits == 0:
> > # 0
> > # else if offset + bits < 32:
> > # inv_bits = 32 - bits
> > # (value << (inv_bits - offset)) >> inv_bits
> > # else:
> > # value >> offset
> >
> > All 3 variants should use ASHR if the signed version is required.
> > Alternatively, one can use this as well:
> >
> > # if bits == 32:
> > # value
> > # else:
> > # (value >> offset) & ((1 << bits) - 1)
> >
> > And an explicit sign extension should be used for the signed version.
> >
> > The first three are so complex that I doubt it would be easy to
> > recognize them and match them precisely. They are also very unlikely
> > to appear open-coded in real applications, therefore, expanding them
> > and then matching them again seems like a huge waste of time. Apps
> > will likely just use LSHR+AND, which extracts a bitfield, but it's not
> > the same as BFE, so BFE can't be used for that. (if the second operand
> > of AND is a constant expression, then BFE can indeed be used)
> >
> > I agree that we could use LLVM IR for simple instructions like BFM and
> > BFI, but I doubt it's a good idea for complex instructions like BFE,
> > and any small optimization that changes the expression can break the
> > matching.
> >
> > Marek
> >
> >
> > On Tue, Mar 10, 2015 at 3:30 PM, Tom Stellard  wrote:
> >> On Tue, Mar 10, 2015 at 12:42:38PM +0100, Marek Olšák wrote:
> >>> OK. What about patches 8 an 9?
> >>>
> >>
> >> I think the intrinsics in 9 are OK, but 8 should be using LLVM IR.
> >>
> >> -Tom
> >>
> >>> Marek
> >>>
> >>> On Thu, Mar 5, 2015 at 8:30 PM, Tom Stellard  wrote:
> >>> > On Mon, Mar 02, 2015 at 12:54:21PM +0100, Marek Olšák wrote:
> >>> >> From: Marek Olšák 
> >>> >>
> >>> >
> >>> > Hi Marek,
> >>> >
> >>> > After discussing with Matt, I think we should use LLVM IR rather than
> >>> > intrinsics for IBFE and UBFE and then add patterns for them either in
> >>> > the TableGen Files or AMDGPUISelDAGToDAG.cpp.
> >>> >
> >>> > Using intrinsics for BREV and POPC is fine though.
> >>> >
> >>> > -Tom
> >>> >
> >>> >> ---
> >>> >>  src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c | 8 
> >>> >>  1 file changed, 8 insertions(+)
> >>> >>
> >>> >> diff --git a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c 
> >>> >> b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
> >>> >> index 385d3ad..034095f 100644
> >>> >> --- a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
> >>> >> +++ b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
> >>> >> @@ -1293,6 +1293,8 @@ void radeon_llvm_context_init(struct 
> >>> >> radeon_llvm_context * ctx)
> >>> >>   bld_base->op_actions[TGSI_OPCODE_AND].emit = emit_and;
> >>> >>   bld_base->op_actions[TGSI_OPCODE_ARL].emit = emit_arl;
> >>> >>   bld_base->op_actions[TGSI_OPCODE_BGNLOOP].emit = bgnloop_emit;
> >>> >> + bld_base->op_actions[TGSI_OPCODE_BREV].emit = 
> >>> >> build_tgsi_intrinsic_nomem;
> >>> >> + bld_base->op_actions[TGSI_OPCODE_BREV].intr_name = 
> >>> >> "llvm.AMDGPU.brev";
> >>> >>   bld_base->op_actions[TGSI_OPCODE_BRK].emit = brk_emit;
> >>> >>   bld_base->op_actions[TGSI_OPCODE_CEIL].emit = 
> >>> >> build_tgsi_intrinsic_nomem;
> >>> >>   bld_base->op_actions[TGSI_OPCODE_CEIL].intr_name = "ceil";
> >>> >> @@ -1326,6 +1328,8 @@ void radeon_llvm_context_init(struct 
> >>> >> radeon_llvm_context * ctx)
> >>> >>   bld_base->op_actions[TGSI_OPCODE_FSNE].emit = emit_fcmp;
> >>> >>   bld_base->op_actions[TGSI_OPCODE_IABS].emit = 
> >>> >> build_tgsi_intrinsic_nomem;
> >>> >>   bld_base->op_actions[TGSI_OPCODE_IABS].intr_name = 
> >>> >> "llvm.AMDIL.abs.";
> >>> >> + bld_base->op_actions[TGSI_OPCODE_IBFE].emit = 
> >>> >> build_tgsi_intrinsic_nomem;
> >>> >> + bld_base->op_act

Re: [Mesa-dev] [PATCH 7/9] radeonsi: add support for easy opcodes from ARB_gpu_shader5

2015-03-10 Thread Tom Stellard

On Wed, Mar 11, 2015 at 01:27:32AM +0100, Marek Olšák wrote:
> On Wed, Mar 11, 2015 at 12:09 AM, Tom Stellard  wrote:
> > On Tue, Mar 10, 2015 at 11:01:21PM +0100, Marek Olšák wrote:
> >> I've looked into how to recognize BFM and BFI and discovered that if
> >> TGSI_OPCODE_BFI is expanded, it's _impossible_ to recognize the
> >> pattern in the backend due to LLVM transformations. The reason it's
> >> impossible is that one particular simplification of the expanded IR
> >> can always be done and it always changes the IR in a way that BFI
> >> can't be recognized anymore.
> >>
> >> The ideal transformation from TGSI to ISA is (note that this is also
> >> how GLSL defines the opcode):
> >>
> >> TGSI_OPCODE_BFI(base, insert, offset, bits)
> >> = BFI(BFM(bits, offset), SHL(insert, offset), base) =
> >> s_lshl_b32 s1, s4, s6
> >> s_bfm_b32 s0, s0, s6
> >> v_mov_b32_e32 v0, s5
> >> v_mov_b32_e32 v1, s1
> >> v_bfi_b32 v0, s0, v1, v0
> >> Ideally 3 instructions if all sources are vector registers.
> >>
> >> However, if TGSI_OPCODE_BFI is expanded into basic bitwise operations
> >> (BTW the result of BFM has 2 uses in BFI), LLVM applies this
> >> transformation:
> >> (X << S) & (Y << S) > (X & Y) << S
> >> Which breaks recognition of BFI and also the second use of BFM.
> >> Therefore this version calculates the same BFM expression twice. The
> >> first BFM is recognized by pattern matching, but the second BFM as
> >> well as BFI is unrecognizable due to the transformation. The result
> >> is:
> >> s_lshl_b32 s1, 1, s0
> >> s_bfm_b32 s0, s0, s6
> >> s_add_i32 s1, s1, -1
> >> s_not_b32 s0, s0
> >> s_and_b32 s1, s1, s4
> >> s_and_b32 s0, s0, s5
> >> s_lshl_b32 s1, s1, s6
> >> s_or_b32 s0, s1, s0
> >>
> >> There are 2 ways out of this:
> >>
> >> 1) Use BFM and BFI intrinsics in Mesa. Simple and unlikely to break in
> >> the future.
> >>
> >> 2) Try to recognize the expression tree seen by the backend. Changes
> >> in LLVM core can break it. More complicated shaders with more
> >> opportunities for transformations can break it too:
> >>
> >> def : Pat <
> >>   (i32 (or (i32 (shl (i32 (and (i32 (add (i32 (shl 1, i32:$a)),
> >> -1)), i32:$b)), i32:$c)),
> >>(i32 (and (i32 (xor (i32 (shl (i32 (add (i32 (shl 1, i32:$a)),
> >> -1)), i32:$c)), -1)), i32:$d,
> >>   (V_BFI_B32 (S_BFM_B32 $a, $c),
> >>  (S_LSHL_B32 $b, $c),
> >>  $d)
> >> >;
> >
> > I don't want to waste a lot of time discussing this, because it probably
> > doesn't matter too much in the long run.  I'm fine with using
> > intrinsics, but I just wanted to point out a few things in case you or
> > someone else wants to get this working using LLVM IR.
> >
> > 1. Running the instruction combining pass should help with pattern
> > matching.  This transforms common sequence into canonical forms which
> > make them easier to match in the backend.  We should be running this
> > pass anyway as it has some good optimization.
> >
> >
> > diff --git a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c 
> > b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
> > index dce5b55..45c9eb8 100644
> > --- a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
> > +++ b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
> > @@ -1444,6 +1444,7 @@ void radeon_llvm_finalize_module(struct
> > radeon_llvm_context * ctx)
> > LLVMAddPromoteMemoryToRegisterPass(gallivm->passmgr);
> >
> > /* Add some optimization passes */
> > +   LLVMAddInstructionCombiningPass(gallivm->passmgr);
> > LLVMAddScalarReplAggregatesPass(gallivm->passmgr);
> > LLVMAddLICMPass(gallivm->passmgr);
> > LLVMAddAggressiveDCEPass(gallivm->passmgr);
> 
> I tried this a long time ago and it broke a few tests which used the
> kill intrinsic.
> 
> I'm testing it right now and it increases the shader binary size quite
> a lot. It looks like some DAG combines don't work with it anymore and
> the generated code looks worse overall.
> 
> I occasionally use llc for debugging, which doesn't seem to use it
> either? Anyway, it looks like there's a lot of work needed just to fix
&

[Mesa-dev] [PATCH 2/2] radeonsi: Use llvm.amdgcn.s.buffer.load instead of llvm.SI.load.const

2017-01-31 Thread Tom Stellard

Advantages of using llvm.amdgcn.s.buffer.load

- We can use a real pointer type, which LLVM can better reason about and do
  alias analysis on.  This will also ease the transition to using fat pointers
  and LLVM IR loads.

- llvm.amdgcn.s.buffer.load is defined in IntrinsicsAMDGPU.td so passes can
  query information about it other than just its attributes.
---
 src/gallium/auxiliary/gallivm/lp_bld_intr.c|  1 +
 src/gallium/auxiliary/gallivm/lp_bld_intr.h|  3 +-
 src/gallium/drivers/radeonsi/si_shader.c   | 48 +-
 src/gallium/drivers/radeonsi/si_shader_internal.h  |  8 
 .../drivers/radeonsi/si_shader_tgsi_setup.c|  6 +++
 5 files changed, 55 insertions(+), 11 deletions(-)

diff --git a/src/gallium/auxiliary/gallivm/lp_bld_intr.c 
b/src/gallium/auxiliary/gallivm/lp_bld_intr.c
index 049671a..dc8de55 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_intr.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_intr.c
@@ -144,6 +144,7 @@ static const char *attr_to_str(enum lp_func_attr attr)
 {
switch (attr) {
case LP_FUNC_ATTR_ALWAYSINLINE: return "alwaysinline";
+   case LP_FUNC_ATTR_ARGMEMONLY: return "argmemonly";
case LP_FUNC_ATTR_BYVAL: return "byval";
case LP_FUNC_ATTR_INREG: return "inreg";
case LP_FUNC_ATTR_NOALIAS: return "noalias";
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_intr.h 
b/src/gallium/auxiliary/gallivm/lp_bld_intr.h
index f1e075a..7c8f09b 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_intr.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_intr.h
@@ -54,7 +54,8 @@ enum lp_func_attr {
LP_FUNC_ATTR_NOUNWIND = (1 << 4),
LP_FUNC_ATTR_READNONE = (1 << 5),
LP_FUNC_ATTR_READONLY = (1 << 6),
-   LP_FUNC_ATTR_LAST = (1 << 7)
+   LP_FUNC_ATTR_ARGMEMONLY   = (1 << 7),
+   LP_FUNC_ATTR_LAST = (1 << 8)
 };
 
 void
diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
b/src/gallium/drivers/radeonsi/si_shader.c
index a6de7c4..cf13cb5 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -93,11 +93,6 @@ static void si_build_ps_epilog_function(struct 
si_shader_context *ctx,
  */
 #define VS_EPILOG_PRIMID_LOC 2
 
-enum {
-   CONST_ADDR_SPACE = 2,
-   LOCAL_ADDR_SPACE = 3,
-};
-
 #define SENDMSG_GS 2
 #define SENDMSG_GS_DONE 3
 
@@ -360,8 +355,21 @@ static LLVMValueRef build_indexed_load_const(
struct si_shader_context *ctx,
LLVMValueRef base_ptr, LLVMValueRef index)
 {
+   LLVMTypeRef ptr_type = LLVMTypeOf(base_ptr);
+   LLVMTypeRef elem_type = LLVMGetElementType(ptr_type);
+   LLVMTypeKind elem_kind = LLVMGetTypeKind(elem_type);
LLVMValueRef result = build_indexed_load(ctx, base_ptr, index, true);
LLVMSetMetadata(result, ctx->invariant_load_md_kind, ctx->empty_md);
+
+   /* Set !dereferenceable metadata */
+   if (elem_kind == LLVMPointerTypeKind ||
+   (elem_kind == LLVMArrayTypeKind && 
LLVMGetTypeKind(LLVMGetElementType(elem_type)) == LLVMPointerTypeKind)) {
+   LLVMValueRef deref_bytes, deref_md;
+   deref_bytes = LLVMConstInt(ctx->i64, UINT64_MAX, 0);
+   deref_md = LLVMMDNodeInContext(LLVMGetTypeContext(ptr_type),
+   &deref_bytes, 1);
+   LLVMSetMetadata(result, ctx->dereferenceable_md_kind, deref_md);
+   }
return result;
 }
 
@@ -1571,16 +1579,34 @@ static LLVMValueRef get_thread_id(struct 
si_shader_context *ctx)
 
 /**
  * Load a dword from a constant buffer.
+ * @param offset This is a byte offset.
+ * @returns An LLVMValueRef with f32 type.
  */
 static LLVMValueRef buffer_load_const(struct si_shader_context *ctx,
  LLVMValueRef resource,
  LLVMValueRef offset)
 {
LLVMBuilderRef builder = ctx->gallivm.builder;
-   LLVMValueRef args[2] = {resource, offset};
+   LLVMValueRef load;
+   LLVMValueRef args[3] = {resource, offset, LLVMConstInt(ctx->i1, 0, 0) };
+   LLVMTypeRef resource_type = LLVMTypeOf(resource);
+   LLVMTypeKind resource_kind = LLVMGetTypeKind(resource_type);
+
+   /* XXX: We can have a non-pointer resource if we do a constant load
+ * from the RW_BUFFERS whicha are still represented using the <16 x i8>
+ * type. We can eliminate this once we start using pointer types for
+* those buffers.
+*/
+   if (resource_kind != LLVMPointerTypeKind) {
+   return lp_build_intrinsic(builder, "llvm.SI.load.const",
+ ctx->f32, args, 2,
+ LP_FUNC_ATTR_READNONE);
+   }
 
-   return lp_build_intrinsic(builder, "llvm.SI.load.const", ctx->f32, 
args, 2,
-  LP_FUNC_ATTR_READNONE);
+   load = lp_build_intrinsic(builder, "llvm.amdgcn.s.buffer.load.i32",
+ ctx->i32, args, 3,

[Mesa-dev] [PATCH 1/2] radeonsi: Use build_buffer_load helper function for geometry shaders

2017-01-31 Thread Tom Stellard

Also modify build_buffer_load to always pass soffset to an intrinsic
if it is set.  This is required to avoid failing buffer range checks
in some cases.
---
 src/gallium/drivers/radeonsi/si_shader.c | 67 ++--
 1 file changed, 20 insertions(+), 47 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
b/src/gallium/drivers/radeonsi/si_shader.c
index 5c5f2e6..a6de7c4 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -890,7 +890,7 @@ static LLVMValueRef build_buffer_load(struct 
si_shader_context *ctx,
struct gallivm_state *gallivm = &ctx->gallivm;
unsigned func = CLAMP(num_channels, 1, 3) - 1;
 
-   if (HAVE_LLVM >= 0x309) {
+   if (!soffset && HAVE_LLVM >= 0x309) {
LLVMValueRef args[] = {
LLVMBuildBitCast(gallivm->builder, rsrc, ctx->v4i32, 
""),
vindex ? vindex : LLVMConstInt(ctx->i32, 0, 0),
@@ -909,11 +909,6 @@ static LLVMValueRef build_buffer_load(struct 
si_shader_context *ctx,
   "");
}
 
-   if (soffset) {
-   args[2] = LLVMBuildAdd(gallivm->builder, args[2], 
soffset,
-  "");
-   }
-
snprintf(name, sizeof(name), "llvm.amdgcn.buffer.load.%s",
 type_names[func]);
 
@@ -1185,13 +1180,12 @@ static LLVMValueRef fetch_input_gs(
struct lp_build_context *uint = &ctx->bld_base.uint_bld;
struct gallivm_state *gallivm = base->gallivm;
LLVMValueRef vtx_offset;
-   LLVMValueRef args[9];
unsigned vtx_offset_param;
struct tgsi_shader_info *info = &shader->selector->info;
unsigned semantic_name = info->input_semantic_name[reg->Register.Index];
unsigned semantic_index = 
info->input_semantic_index[reg->Register.Index];
unsigned param;
-   LLVMValueRef value;
+   LLVMValueRef soffset, value;
 
if (swizzle != ~0 && semantic_name == TGSI_SEMANTIC_PRIMID)
return get_primitive_id(bld_base, swizzle);
@@ -1223,27 +1217,15 @@ static LLVMValueRef fetch_input_gs(
  4);
 
param = si_shader_io_get_unique_index(semantic_name, semantic_index);
-   args[0] = ctx->esgs_ring;
-   args[1] = vtx_offset;
-   args[2] = lp_build_const_int32(gallivm, (param * 4 + swizzle) * 256);
-   args[3] = uint->zero;
-   args[4] = uint->one;  /* OFFEN */
-   args[5] = uint->zero; /* IDXEN */
-   args[6] = uint->one;  /* GLC */
-   args[7] = uint->zero; /* SLC */
-   args[8] = uint->zero; /* TFE */
-
-   value = lp_build_intrinsic(gallivm->builder,
-  "llvm.SI.buffer.load.dword.i32.i32",
-  ctx->i32, args, 9,
-  LP_FUNC_ATTR_READONLY);
+   soffset = lp_build_const_int32(gallivm, (param * 4 + swizzle) * 256);
+
+   value = build_buffer_load(ctx, ctx->esgs_ring, 1, NULL,
+ vtx_offset, soffset, 0, 1, 0);
if (tgsi_type_is_64bit(type)) {
LLVMValueRef value2;
-   args[2] = lp_build_const_int32(gallivm, (param * 4 + swizzle + 
1) * 256);
-   value2 = lp_build_intrinsic(gallivm->builder,
-   "llvm.SI.buffer.load.dword.i32.i32",
-   ctx->i32, args, 9,
-   LP_FUNC_ATTR_READONLY);
+   soffset = lp_build_const_int32(gallivm, (param * 4 + swizzle + 
1) * 256);
+   value2 = build_buffer_load(ctx, ctx->esgs_ring, 1, NULL,
+  vtx_offset, soffset, 0, 1, 0);
return si_llvm_emit_fetch_64bit(bld_base, type,
value, value2);
}
@@ -6476,7 +6458,7 @@ si_generate_gs_copy_shader(struct si_screen *sscreen,
struct lp_build_context *uint = &bld_base->uint_bld;
struct si_shader_output_values *outputs;
struct tgsi_shader_info *gsinfo = &gs_selector->info;
-   LLVMValueRef args[9];
+   LLVMValueRef voffset;
int i, r;
 
outputs = MALLOC(gsinfo->num_outputs * sizeof(outputs[0]));
@@ -6503,18 +6485,6 @@ si_generate_gs_copy_shader(struct si_screen *sscreen,
create_function(&ctx);
preload_ring_buffers(&ctx);
 
-   args[0] = ctx.gsvs_ring[0];
-   args[1] = lp_build_mul_imm(uint,
-  LLVMGetParam(ctx.main_fn,
-   ctx.param_vertex_id),
-  4);
-   args[3] = uint->zero;
-   args[4] = uint->one;  /* OFFEN */
-   args[5] = uint->zero; /* IDXEN */
-   args[6] = uint->one;  /* GLC */
-   args[7] = uint->one;  /* SLC */
-

[Mesa-dev] [PATCH] radeonsi: Fix build on LLVM < 3.9

2017-01-31 Thread Tom Stellard

This was broken by: e0cc0a614c96011958bc3a1b84da9168e0e1ccbb
---
 src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c | 14 ++
 1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c 
b/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c
index 205686a..897faae 100644
--- a/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c
+++ b/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c
@@ -1257,7 +1257,11 @@ void si_llvm_context_init(struct si_shader_context *ctx,
 {
struct lp_type type;
LLVMTargetDataRef data_layout = LLVMCreateTargetDataLayout(tm);
-   char *data_layout_str = LLVMCopyStringRepOfTargetData(data_layout);
+   char *data_layout_str = NULL;
+
+   if (HAVE_LLVM >= 0x0309) {
+   data_layout_str = LLVMCopyStringRepOfTargetData(data_layout);
+   }
 
/* Initialize the gallivm object:
 * We are only using the module, context, and builder fields of this 
struct.
@@ -1275,9 +1279,11 @@ void si_llvm_context_init(struct si_shader_context *ctx,
ctx->gallivm.context);
LLVMSetTarget(ctx->gallivm.module, "amdgcn--");
 
-   LLVMSetDataLayout(ctx->gallivm.module, data_layout_str);
-   LLVMDisposeTargetData(data_layout);
-   LLVMDisposeMessage(data_layout_str);
+   if (data_layout_str) {
+   LLVMSetDataLayout(ctx->gallivm.module, data_layout_str);
+   LLVMDisposeTargetData(data_layout);
+   LLVMDisposeMessage(data_layout_str);
+   }
 
bool unsafe_fpmath = (sscreen->b.debug_flags & DBG_UNSAFE_MATH) != 0;
ctx->gallivm.builder = lp_create_builder(ctx->gallivm.context,
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH] radeonsi: Fix build on LLVM < 3.9 v2

2017-01-31 Thread Tom Stellard

This was broken by: e0cc0a614c96011958bc3a1b84da9168e0e1ccbb

v2:
  - Use preprocessor macro
---
 src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c | 6 --
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c 
b/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c
index 205686a..c7445e0 100644
--- a/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c
+++ b/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c
@@ -1256,8 +1256,6 @@ void si_llvm_context_init(struct si_shader_context *ctx,
  const struct tgsi_token *tokens)
 {
struct lp_type type;
-   LLVMTargetDataRef data_layout = LLVMCreateTargetDataLayout(tm);
-   char *data_layout_str = LLVMCopyStringRepOfTargetData(data_layout);
 
/* Initialize the gallivm object:
 * We are only using the module, context, and builder fields of this 
struct.
@@ -1275,9 +1273,13 @@ void si_llvm_context_init(struct si_shader_context *ctx,
ctx->gallivm.context);
LLVMSetTarget(ctx->gallivm.module, "amdgcn--");
 
+#if HAVE_LLVM >= 0x0309
+   LLVMTargetDataRef data_layout = LLVMCreateTargetDataLayout(tm);
+   char *data_layout_str = LLVMCopyStringRepOfTargetData(data_layout);
LLVMSetDataLayout(ctx->gallivm.module, data_layout_str);
LLVMDisposeTargetData(data_layout);
LLVMDisposeMessage(data_layout_str);
+#endif
 
bool unsafe_fpmath = (sscreen->b.debug_flags & DBG_UNSAFE_MATH) != 0;
ctx->gallivm.builder = lp_create_builder(ctx->gallivm.context,
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] radv/ac: enable loop unrolling.

2017-02-27 Thread Tom Stellard

On Fri, Feb 24, 2017 at 03:30:50PM -0800, Matt Arsenault wrote:
> 
> > On Feb 24, 2017, at 14:39, Marek Olšák  wrote:
> > 
> > On Fri, Feb 24, 2017 at 7:20 PM, Matt Arsenault  wrote:
> >> 
> >> On Feb 24, 2017, at 01:45, Marek Olšák  wrote:
> >> 
> >> The main requirement is that if there is indirect indexing inside a
> >> loop, we always want to unroll the whole loop to get rid of the
> >> indexing, which can decrease scratch usage.
> >> 
> >> Marek
> >> 
> >> We boost the unroll thresholds when there is private memory indexed by the
> >> induction variable. See AMDGPUTTIImpl::getUnrollingPreferences
> > 
> > When Samuel Pitoiset was experimenting with the same code as this
> > patch but for radeonsi, getUnrollingPreferences wasn't even getting
> > called when unrolling. I guess he eventually gave up or didn't see any
> > positive effect from it.
> > 
> > Marek
> 
> Then there’s a bug somewhere. It should be getting called

It's possible TargetTransformInfo isn't being setup correctly by the
mesa pass pipeline.

-Tom

> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 07/18] gallium/radeon: re-enable unsafe math for graphics shaders

2015-07-28 Thread Tom Stellard

On Tue, Jul 28, 2015 at 12:05:42PM +0200, Marek Olšák wrote:
> From: Marek Olšák 
> 
> This reverts commit 4db985a5fa9ea985616a726b1770727309502d81.
> 
> The grass no longer disappears, which was the reason the commit was reverted.
> This might affect tessellation. We'll see.
> 

Hi Marek,

This patch is: Reviewed-by: Tom Stellard 

Setting this fast-math flags on individual floating-point instructions
may have an even bigger impact, because this function attribute is only
used in a few places.

Here is documentation for the fast-math flags:
http://llvm.org/docs/LangRef.html#fast-math-flags

Currently the only way to set the fast-math flags on instructions is
with the C++ API.  Something like this should work:

LLVMValueRef Inst;
llvm::Instruction *I = llvm:unwrap(Inst);

// Construct fast-math flags;
llvm::FastMathFlags Flags;
Flags.setUnsafeAlgebra();
// Apply other flags as needed

I->setFastMathFlags(Flags)

I'm not sure which fast-math flags the spec allows, so we would need to
figure that out.

-Tom

> Totals from affected shaders:
> SGPRS: 151672 -> 150232 (-0.95 %)
> VGPRS: 90620 -> 89776 (-0.93 %)
> Code Size: 3980472 -> 3920836 (-1.50 %) bytes
> LDS: 67 -> 67 (0.00 %) blocks
> Scratch: 1357824 -> 1202176 (-11.46 %) bytes per wave
> ---
>  src/gallium/drivers/radeon/radeon_llvm_emit.c | 4 
>  1 file changed, 4 insertions(+)
> 
> diff --git a/src/gallium/drivers/radeon/radeon_llvm_emit.c 
> b/src/gallium/drivers/radeon/radeon_llvm_emit.c
> index 973d6ed..c442c65 100644
> --- a/src/gallium/drivers/radeon/radeon_llvm_emit.c
> +++ b/src/gallium/drivers/radeon/radeon_llvm_emit.c
> @@ -82,6 +82,10 @@ void radeon_llvm_shader_type(LLVMValueRef F, unsigned type)
>   sprintf(Str, "%1d", llvm_type);
>  
>   LLVMAddTargetDependentFunctionAttr(F, "ShaderType", Str);
> +
> + if (type != TGSI_PROCESSOR_COMPUTE) {
> + LLVMAddTargetDependentFunctionAttr(F, "unsafe-fp-math", "true");
> + }
>  }
>  
>  static void init_r600_target()
> -- 
> 2.1.4
> 
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 05/18] gallium/radeon: move r600-specific code to r600g

2015-07-28 Thread Tom Stellard

On Tue, Jul 28, 2015 at 12:05:40PM +0200, Marek Olšák wrote:
> From: Marek Olšák 
> 
Reviewed-by: Tom Stellard 

> ---
>  src/gallium/drivers/r600/r600_llvm.c   | 152 
> -
>  .../drivers/radeon/radeon_setup_tgsi_llvm.c| 150 
>  2 files changed, 150 insertions(+), 152 deletions(-)
> 
> diff --git a/src/gallium/drivers/r600/r600_llvm.c 
> b/src/gallium/drivers/r600/r600_llvm.c
> index f865549..9cd4357 100644
> --- a/src/gallium/drivers/r600/r600_llvm.c
> +++ b/src/gallium/drivers/r600/r600_llvm.c
> @@ -754,7 +754,131 @@ static struct lp_build_tgsi_action dot_action = {
>   .intr_name = "llvm.AMDGPU.dp4"
>  };
>  
> +static void txd_fetch_args(
> + struct lp_build_tgsi_context * bld_base,
> + struct lp_build_emit_data * emit_data)
> +{
> + const struct tgsi_full_instruction * inst = emit_data->inst;
> +
> + LLVMValueRef coords[4];
> + unsigned chan, src;
> + for (src = 0; src < 3; src++) {
> + for (chan = 0; chan < 4; chan++)
> + coords[chan] = lp_build_emit_fetch(bld_base, inst, src, 
> chan);
> +
> + emit_data->args[src] = 
> lp_build_gather_values(bld_base->base.gallivm,
> + coords, 4);
> + }
> + emit_data->arg_count = 3;
> + emit_data->dst_type = LLVMVectorType(bld_base->base.elem_type, 4);
> +}
> +
> +
> +static void txp_fetch_args(
> + struct lp_build_tgsi_context * bld_base,
> + struct lp_build_emit_data * emit_data)
> +{
> + const struct tgsi_full_instruction * inst = emit_data->inst;
> + LLVMValueRef src_w;
> + unsigned chan;
> + LLVMValueRef coords[5];
> +
> + emit_data->dst_type = LLVMVectorType(bld_base->base.elem_type, 4);
> + src_w = lp_build_emit_fetch(bld_base, emit_data->inst, 0, TGSI_CHAN_W);
> +
> + for (chan = 0; chan < 3; chan++ ) {
> + LLVMValueRef arg = lp_build_emit_fetch(bld_base,
> + emit_data->inst, 0, chan);
> + coords[chan] = lp_build_emit_llvm_binary(bld_base,
> + TGSI_OPCODE_DIV, arg, src_w);
> + }
> + coords[3] = bld_base->base.one;
> +
> + if ((inst->Texture.Texture == TGSI_TEXTURE_CUBE ||
> +  inst->Texture.Texture == TGSI_TEXTURE_CUBE_ARRAY ||
> +  inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE ||
> +  inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE_ARRAY) &&
> + inst->Instruction.Opcode != TGSI_OPCODE_TXQ &&
> + inst->Instruction.Opcode != TGSI_OPCODE_TXQ_LZ) {
> + radeon_llvm_emit_prepare_cube_coords(bld_base, emit_data, 
> coords, NULL);
> + }
>  
> + emit_data->args[0] = lp_build_gather_values(bld_base->base.gallivm,
> + coords, 4);
> + emit_data->arg_count = 1;
> +}
> +
> +static void tex_fetch_args(
> + struct lp_build_tgsi_context * bld_base,
> + struct lp_build_emit_data * emit_data)
> +{
> + const struct tgsi_full_instruction * inst = emit_data->inst;
> +
> + LLVMValueRef coords[5];
> + unsigned chan;
> + for (chan = 0; chan < 4; chan++) {
> + coords[chan] = lp_build_emit_fetch(bld_base, inst, 0, chan);
> + }
> +
> + if (inst->Instruction.Opcode == TGSI_OPCODE_TEX2 ||
> + inst->Instruction.Opcode == TGSI_OPCODE_TXB2 ||
> + inst->Instruction.Opcode == TGSI_OPCODE_TXL2) {
> + /* These instructions have additional operand that should be 
> packed
> +  * into the cube coord vector by 
> radeon_llvm_emit_prepare_cube_coords.
> +  * That operand should be passed as a float value in the args 
> array
> +  * right after the coord vector. After packing it's not used 
> anymore,
> +  * that's why arg_count is not increased */
> + coords[4] = lp_build_emit_fetch(bld_base, inst, 1, 0);
> + }
> +
> + if ((inst->Texture.Texture == TGSI_TEXTURE_CUBE ||
> +  inst->Texture.Texture == TGSI_TEXTURE_CUBE_ARRAY ||
> +  inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE ||
> +  inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE_ARRAY) &&
> + inst->Instruction.Opcode != TGSI_OPCODE_TXQ &&
> + inst->Instruction.Opcode != TGSI_OPCODE_TXQ_LZ) {
> + radeon_llvm_emit_prepare_cube_coords(bld_base, emit_data, 
> coords, NULL);
> + }
> +
> + emit_data->arg_count = 1;
> + e

Re: [Mesa-dev] [PATCH 1/3] gallivm: Don't use raw_debug_ostream for dissasembling

2015-07-29 Thread Tom Stellard

On Wed, Jul 29, 2015 at 09:54:05AM +0100, Jose Fonseca wrote:
> On 23/07/15 17:06, Jose Fonseca wrote:
> > On 20/07/15 21:39, Jose Fonseca wrote:
> >> On 20/07/15 18:35, Tom Stellard wrote:
> >>> All LLVM API calls that require an ostream object have been removed from
> >>> the disassemble() function, so we don't need to use this class to wrap
> >>> _debug_printf() we can just call this function directly.
> >>> ---
> >>>   src/gallium/auxiliary/gallivm/lp_bld_debug.cpp | 27
> >>> +-
> >>>   1 file changed, 13 insertions(+), 14 deletions(-)
> >>>
> >>> diff --git a/src/gallium/auxiliary/gallivm/lp_bld_debug.cpp
> >>> b/src/gallium/auxiliary/gallivm/lp_bld_debug.cpp
> >>> index 405e648..ec88f33 100644
> >>> --- a/src/gallium/auxiliary/gallivm/lp_bld_debug.cpp
> >>> +++ b/src/gallium/auxiliary/gallivm/lp_bld_debug.cpp
> >>> @@ -123,7 +123,7 @@ lp_debug_dump_value(LLVMValueRef value)
> >>>* - http://blog.llvm.org/2010/04/intro-to-llvm-mc-project.html
> >>>*/
> >>>   static size_t
> >>> -disassemble(const void* func, llvm::raw_ostream & Out)
> >>> +disassemble(const void* func)
> >>>   {
> >>>  const uint8_t *bytes = (const uint8_t *)func;
> >>>
> >>> @@ -141,7 +141,8 @@ disassemble(const void* func, llvm::raw_ostream &
> >>> Out)
> >>>  char outline[1024];
> >>>
> >>>  if (!D) {
> >>> -  Out << "error: couldn't create disassembler for triple " <<
> >>> Triple << "\n";
> >>> +  _debug_printf("error: couldn't create disassembler for triple
> >>> %s\n",
> >>> +Triple.c_str());
> >>> return 0;
> >>>  }
> >>>
> >>> @@ -155,13 +156,13 @@ disassemble(const void* func, llvm::raw_ostream
> >>> & Out)
> >>>  * so that between runs.
> >>>  */
> >>>
> >>> -  Out << llvm::format("%6lu:\t", (unsigned long)pc);
> >>> +  _debug_printf("%6lu:\t", (unsigned long)pc);
> >>>
> >>> Size = LLVMDisasmInstruction(D, (uint8_t *)bytes + pc, extent
> >>> - pc, 0, outline,
> >>>  sizeof outline);
> >>>
> >>> if (!Size) {
> >>> - Out << "invalid\n";
> >>> + _debug_printf("invalid\n");
> >>>pc += 1;
> >>>break;
> >>> }
> >>> @@ -173,10 +174,10 @@ disassemble(const void* func, llvm::raw_ostream
> >>> & Out)
> >>> if (0) {
> >>>unsigned i;
> >>>for (i = 0; i < Size; ++i) {
> >>> -Out << llvm::format("%02x ", bytes[pc + i]);
> >>> +_debug_printf("%02x ", bytes[pc + i]);
> >>>}
> >>>for (; i < 16; ++i) {
> >>> -Out << "   ";
> >>> +_debug_printf("   ");
> >>>}
> >>> }
> >>>
> >>> @@ -184,9 +185,9 @@ disassemble(const void* func, llvm::raw_ostream &
> >>> Out)
> >>>  * Print the instruction.
> >>>  */
> >>>
> >>> -  Out << outline;
> >>> +  _debug_printf("%*s", Size, outline);
> >>>
> >>> -  Out << "\n";
> >>> +  _debug_printf("\n");
> >>>
> >>> /*
> >>>  * Stop disassembling on return statements, if there is no
> >>> record of a
> >>> @@ -206,13 +207,12 @@ disassemble(const void* func, llvm::raw_ostream
> >>> & Out)
> >>> pc += Size;
> >>>
> >>> if (pc >= extent) {
> >>> - Out << "disassembly larger than " << extent << "bytes,
> >>> aborting\n";
> >>> + _debug_printf("disassembly larger than %ull bytes,
> >>> aborting\n", extent);
> >>>break;
> >>> }
> >>>  }
> >>>
> >>> -   Out << &quo

Re: [Mesa-dev] [PATCH 08/18] radeonsi: don't use llvm.AMDIL.fraction for FRC and DFRAC

2015-07-31 Thread Tom Stellard

On Tue, Jul 28, 2015 at 12:05:43PM +0200, Marek Olšák wrote:
> From: Marek Olšák 
> 
> There are 2 reasons for this:
> - LLVM optimization passes can work with floor
> - there are patterns to select v_fract from floor anyway
> 
> There is no change in the generated code.
> ---
>  src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c | 20 
>  1 file changed, 16 insertions(+), 4 deletions(-)
> 
> diff --git a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c 
> b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
> index 319380f..5c08cf5 100644
> --- a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
> +++ b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
> @@ -1170,6 +1170,20 @@ static void emit_dneg(
>   emit_data->args[0], "");
>  }
>  
> +static void emit_frac(
> + const struct lp_build_tgsi_action * action,
> + struct lp_build_tgsi_context * bld_base,
> + struct lp_build_emit_data * emit_data)
> +{
> + LLVMBuilderRef builder = bld_base->base.gallivm->builder;
> +
> + LLVMValueRef floor = lp_build_intrinsic(builder, "floor", 
> emit_data->dst_type,

The intrinsics name should be "llvm.floor.f32" for float and "llvm.floor.f64"
for double.

With that fixed, this is:
Reviewed-by: Tom Stellard 

> + &emit_data->args[0], 1,
> + LLVMReadNoneAttribute);
> + emit_data->output[emit_data->chan] = LLVMBuildFSub(builder,
> + emit_data->args[0], floor, "");
> +}
> +
>  static void emit_f2i(
>   const struct lp_build_tgsi_action * action,
>   struct lp_build_tgsi_context * bld_base,
> @@ -1432,8 +1446,7 @@ void radeon_llvm_context_init(struct 
> radeon_llvm_context * ctx)
>   bld_base->op_actions[TGSI_OPCODE_DABS].intr_name = "fabs";
>   bld_base->op_actions[TGSI_OPCODE_DFMA].emit = 
> build_tgsi_intrinsic_nomem;
>   bld_base->op_actions[TGSI_OPCODE_DFMA].intr_name = "llvm.fma.f64";
> - bld_base->op_actions[TGSI_OPCODE_DFRAC].emit = 
> build_tgsi_intrinsic_nomem;
> - bld_base->op_actions[TGSI_OPCODE_DFRAC].intr_name = 
> "llvm.AMDIL.fraction.";
> + bld_base->op_actions[TGSI_OPCODE_DFRAC].emit = emit_frac;
>   bld_base->op_actions[TGSI_OPCODE_DNEG].emit = emit_dneg;
>   bld_base->op_actions[TGSI_OPCODE_DSEQ].emit = emit_dcmp;
>   bld_base->op_actions[TGSI_OPCODE_DSGE].emit = emit_dcmp;
> @@ -1452,8 +1465,7 @@ void radeon_llvm_context_init(struct 
> radeon_llvm_context * ctx)
>   bld_base->op_actions[TGSI_OPCODE_FLR].intr_name = "floor";
>   bld_base->op_actions[TGSI_OPCODE_FMA].emit = build_tgsi_intrinsic_nomem;
>   bld_base->op_actions[TGSI_OPCODE_FMA].intr_name = "llvm.fma.f32";
> - bld_base->op_actions[TGSI_OPCODE_FRC].emit = build_tgsi_intrinsic_nomem;
> - bld_base->op_actions[TGSI_OPCODE_FRC].intr_name = 
> "llvm.AMDIL.fraction.";
> + bld_base->op_actions[TGSI_OPCODE_FRC].emit = emit_frac;
>   bld_base->op_actions[TGSI_OPCODE_F2I].emit = emit_f2i;
>   bld_base->op_actions[TGSI_OPCODE_F2U].emit = emit_f2u;
>   bld_base->op_actions[TGSI_OPCODE_FSEQ].emit = emit_fcmp;
> -- 
> 2.1.4
> 
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 08/18] radeonsi: don't use llvm.AMDIL.fraction for FRC and DFRAC

2015-07-31 Thread Tom Stellard

On Fri, Jul 31, 2015 at 04:59:19PM +0200, Marek Olšák wrote:
> On Fri, Jul 31, 2015 at 4:18 PM, Tom Stellard  wrote:
> > On Tue, Jul 28, 2015 at 12:05:43PM +0200, Marek Olšák wrote:
> >> From: Marek Olšák 
> >>
> >> There are 2 reasons for this:
> >> - LLVM optimization passes can work with floor
> >> - there are patterns to select v_fract from floor anyway
> >>
> >> There is no change in the generated code.
> >> ---
> >>  src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c | 20 
> >> 
> >>  1 file changed, 16 insertions(+), 4 deletions(-)
> >>
> >> diff --git a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c 
> >> b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
> >> index 319380f..5c08cf5 100644
> >> --- a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
> >> +++ b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
> >> @@ -1170,6 +1170,20 @@ static void emit_dneg(
> >>   emit_data->args[0], "");
> >>  }
> >>
> >> +static void emit_frac(
> >> + const struct lp_build_tgsi_action * action,
> >> + struct lp_build_tgsi_context * bld_base,
> >> + struct lp_build_emit_data * emit_data)
> >> +{
> >> + LLVMBuilderRef builder = bld_base->base.gallivm->builder;
> >> +
> >> + LLVMValueRef floor = lp_build_intrinsic(builder, "floor", 
> >> emit_data->dst_type,
> >
> > The intrinsics name should be "llvm.floor.f32" for float and 
> > "llvm.floor.f64"
> > for double.
> >
> > With that fixed, this is:
> > Reviewed-by: Tom Stellard 
> 
> Sorry, I have pushed this already. Is it really required to use
> "llvm.floor.f*"? We've been using "floor" for FLR forever. We've also
> been using "fabs" and "ceil". Are those wrong too?
> 

It is better to use the intrinsics: (i.e. llvm.*) functions, because
they don't have side-effects like the libm calls, so they can be optimized
better.

-Tom

> Marek
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] gallium/radeon: always use the llvm. prefix in intrinsic names

2015-08-04 Thread Tom Stellard

On Mon, Aug 03, 2015 at 05:29:47PM +0900, Michel Dänzer wrote:
> On 01.08.2015 00:35, Marek Olšák wrote:
> > From: Marek Olšák 
> 
> [...]
> 
> > @@ -1176,8 +1176,18 @@ static void emit_frac(
> > struct lp_build_emit_data * emit_data)
> >  {
> > LLVMBuilderRef builder = bld_base->base.gallivm->builder;
> > -
> > -   LLVMValueRef floor = lp_build_intrinsic(builder, "floor", 
> > emit_data->dst_type,
> > +char *intr;
> > +
> > +if (emit_data->info->opcode == TGSI_OPCODE_FRC)
> > +intr = "llvm.floor.f32";
> > +else if (emit_data->info->opcode == TGSI_OPCODE_DFRAC)
> > +intr = "llvm.floor.f64";
> > +else {
> > +assert(0);
> > +return;
> > +}
> 
> The code you're adding here uses spaces for indentation, whereas the
> surrounding code seems to use tabs. With that fixed,
> 
> Acked-by: Michel Dänzer 
> 

Reviewed-by: Tom Stellard 

> but Tom should take a look as well.
> 
> 
> -- 
> Earthling Michel Dänzer   |   http://www.amd.com
> Libre software enthusiast | Mesa and X developer
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] clover: clEnqueue* should block when asked for

2015-08-04 Thread Tom Stellard

On Mon, Aug 03, 2015 at 12:53:32PM +0300, Francisco Jerez wrote:
> EdB  writes:
> 
> > As a side effect, this fix clRetain/ReleaseEvent Piglit test
> > ---
> >  src/gallium/state_trackers/clover/api/transfer.cpp | 29 
> > --
> >  1 file changed, 27 insertions(+), 2 deletions(-)
> >
> > diff --git a/src/gallium/state_trackers/clover/api/transfer.cpp 
> > b/src/gallium/state_trackers/clover/api/transfer.cpp
> > index fdb9405..c2f4f13 100644
> > --- a/src/gallium/state_trackers/clover/api/transfer.cpp
> > +++ b/src/gallium/state_trackers/clover/api/transfer.cpp
> > @@ -295,6 +295,9 @@ clEnqueueReadBuffer(cl_command_queue d_q, cl_mem d_mem, 
> > cl_bool blocking,
> > &mem, obj_origin, obj_pitch,
> > region));
> >  
> > +   if (blocking)
> > +  hev().wait();
> > +
> > ret_object(rd_ev, hev);
> > return CL_SUCCESS;
> >  
> > @@ -325,6 +328,9 @@ clEnqueueWriteBuffer(cl_command_queue d_q, cl_mem 
> > d_mem, cl_bool blocking,
> > ptr, {}, obj_pitch,
> > region));
> >  
> > +   if (blocking)
> > +  hev().wait();
> > +
> > ret_object(rd_ev, hev);
> > return CL_SUCCESS;
> >  
> > @@ -362,6 +368,9 @@ clEnqueueReadBufferRect(cl_command_queue d_q, cl_mem 
> > d_mem, cl_bool blocking,
> > &mem, obj_origin, obj_pitch,
> > region));
> >  
> > +   if (blocking)
> > +  hev().wait();
> > +
> > ret_object(rd_ev, hev);
> > return CL_SUCCESS;
> >  
> > @@ -398,6 +407,8 @@ clEnqueueWriteBufferRect(cl_command_queue d_q, cl_mem 
> > d_mem, cl_bool blocking,
> >soft_copy_op(q, &mem, obj_origin, obj_pitch,
> > ptr, host_origin, host_pitch,
> > region));
> > +   if (blocking)
> > +  hev().wait();
> >  
> > ret_object(rd_ev, hev);
> > return CL_SUCCESS;
> > @@ -504,6 +515,9 @@ clEnqueueReadImage(cl_command_queue d_q, cl_mem d_mem, 
> > cl_bool blocking,
> > &img, src_origin, src_pitch,
> > region));
> >  
> > +   if (blocking)
> > +  hev().wait();
> > +
> > ret_object(rd_ev, hev);
> > return CL_SUCCESS;
> >  
> > @@ -537,6 +551,8 @@ clEnqueueWriteImage(cl_command_queue d_q, cl_mem d_mem, 
> > cl_bool blocking,
> >soft_copy_op(q, &img, dst_origin, dst_pitch,
> > ptr, {}, src_pitch,
> > region));
> > +   if (blocking)
> > +  hev().wait();
> >  
> > ret_object(rd_ev, hev);
> > return CL_SUCCESS;
> > @@ -666,8 +682,12 @@ clEnqueueMapBuffer(cl_command_queue d_q, cl_mem d_mem, 
> > cl_bool blocking,
> > validate_map_flags(mem, flags);
> >  
> > void *map = mem.resource(q).add_map(q, flags, blocking, obj_origin, 
> > region);
> > +   auto hev = create(q, CL_COMMAND_MAP_BUFFER, deps);
> > +
> > +   if (blocking)
> > +  hev().wait();
> >  
> > -   ret_object(rd_ev, create(q, CL_COMMAND_MAP_BUFFER, deps));
> > +   ret_object(rd_ev, hev);
> > ret_error(r_errcode, CL_SUCCESS);
> > return map;
> >  
> > @@ -695,7 +715,12 @@ clEnqueueMapImage(cl_command_queue d_q, cl_mem d_mem, 
> > cl_bool blocking,
> >  
> > void *map = img.resource(q).add_map(q, flags, blocking, origin, region);
> >  
> > -   ret_object(rd_ev, create(q, CL_COMMAND_MAP_IMAGE, deps));
> > +   auto hev = create(q, CL_COMMAND_MAP_IMAGE, deps);
> > +
> > +   if (blocking)
> > +  hev().wait();
> > +
> > +   ret_object(rd_ev, hev);
> > ret_error(r_errcode, CL_SUCCESS);
> > return map;
> >  
> > -- 
> > 2.1.0
> 
> This has come up several times already, and the naive fix has a number
> of problems -- Last time [1] I proposed an alternative solution to avoid
> them, not sure if Grigori is still planning to look into it.
> 

I think someone should add a comment to these functions, so we don't
keep getting this same patch.

-Tom

> [1] http://lists.freedesktop.org/archives/mesa-dev/2015-June/086110.html




> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH] clover: Properly initialize LLVM targets when linking with component libs

2015-08-07 Thread Tom Stellard

Calls to LLVMIntialize* fail when we are linking against individual
component libraries rather than one large shared object, because
we only include component libraries that are required by the drivers.

We need to make sure to only initialize the targets that we need.

CC: 10.6 
---
 configure.ac  |  4 
 src/gallium/state_trackers/clover/Makefile.am |  3 ++-
 src/gallium/state_trackers/clover/llvm/invocation.cpp | 17 +
 3 files changed, 19 insertions(+), 5 deletions(-)

diff --git a/configure.ac b/configure.ac
index 36197d3..e1a7d7a 100644
--- a/configure.ac
+++ b/configure.ac
@@ -2040,8 +2040,10 @@ require_egl_drm() {
 radeon_llvm_check() {
 if test ${LLVM_VERSION_INT} -lt 307; then
 amdgpu_llvm_target_name='r600'
+   CLOVER_CPP_FLAGS="${CLOVER_CPP_FLAGS} -DCLOVER_INIT_R600_TARGET"
 else
 amdgpu_llvm_target_name='amdgpu'
+   CLOVER_CPP_FLAGS="${CLOVER_CPP_FLAGS} -DCLOVER_INIT_AMDGPU_TARGET"
 fi
 if test "x$enable_gallium_llvm" != "xyes"; then
 AC_MSG_ERROR([--enable-gallium-llvm is required when building $1])
@@ -2285,6 +2287,8 @@ AC_SUBST([XA_MINOR], $XA_MINOR)
 AC_SUBST([XA_TINY], $XA_TINY)
 AC_SUBST([XA_VERSION], "$XA_MAJOR.$XA_MINOR.$XA_TINY")
 
+AC_SUBST([CLOVER_CPP_FLAGS], $CLOVER_CPP_FLAGS)
+
 dnl Restore LDFLAGS and CPPFLAGS
 LDFLAGS="$_SAVE_LDFLAGS"
 CPPFLAGS="$_SAVE_CPPFLAGS"
diff --git a/src/gallium/state_trackers/clover/Makefile.am 
b/src/gallium/state_trackers/clover/Makefile.am
index fd0ccf8..975b36f 100644
--- a/src/gallium/state_trackers/clover/Makefile.am
+++ b/src/gallium/state_trackers/clover/Makefile.am
@@ -45,7 +45,8 @@ libclllvm_la_CXXFLAGS = \
$(DEFINES) \
-DLIBCLC_INCLUDEDIR=\"$(LIBCLC_INCLUDEDIR)/\" \
-DLIBCLC_LIBEXECDIR=\"$(LIBCLC_LIBEXECDIR)/\" \
-   -DCLANG_RESOURCE_DIR=\"$(CLANG_RESOURCE_DIR)\"
+   -DCLANG_RESOURCE_DIR=\"$(CLANG_RESOURCE_DIR)\" \
+   $(CLOVER_CPP_FLAGS)
 
 libclllvm_la_SOURCES = $(LLVM_SOURCES)
 
diff --git a/src/gallium/state_trackers/clover/llvm/invocation.cpp 
b/src/gallium/state_trackers/clover/llvm/invocation.cpp
index 86859af..361a149 100644
--- a/src/gallium/state_trackers/clover/llvm/invocation.cpp
+++ b/src/gallium/state_trackers/clover/llvm/invocation.cpp
@@ -786,10 +786,19 @@ namespace {
init_targets() {
   static bool targets_initialized = false;
   if (!targets_initialized) {
- LLVMInitializeAllTargets();
- LLVMInitializeAllTargetInfos();
- LLVMInitializeAllTargetMCs();
- LLVMInitializeAllAsmPrinters();
+#ifdef CLOVER_INIT_AMDGPU_TARGET
+ LLVMInitializeAMDGPUTarget();
+ LLVMInitializeAMDGPUTargetInfo();
+ LLVMInitializeAMDGPUTargetMC();
+ LLVMInitializeAMDGPUAsmPrinter();
+#endif
+
+#ifdef CLOVER_INIT_R600_TARGET
+ LLVMInitializeR600Target();
+ LLVMInitializeR600TargetInfo();
+ LLVMInitializeR600TargetMC();
+ LLVMInitializeR600AsmPrinter();
+#endif
  targets_initialized = true;
   }
}
-- 
2.0.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 1/2] radeonsi/compute: Add some more debug printfs

2016-09-13 Thread Tom Stellard

---
 src/gallium/drivers/radeonsi/si_compute.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/gallium/drivers/radeonsi/si_compute.c 
b/src/gallium/drivers/radeonsi/si_compute.c
index 5041761..a79c224 100644
--- a/src/gallium/drivers/radeonsi/si_compute.c
+++ b/src/gallium/drivers/radeonsi/si_compute.c
@@ -298,6 +298,9 @@ static bool si_switch_compute_shader(struct si_context 
*sctx,
radeon_emit(cs, config->rsrc1);
radeon_emit(cs, config->rsrc2);
 
+   COMPUTE_DBG(sctx->screen, "COMPUTE_PGM_RSRC1: 0x%08x "
+   "COMPUTE_PGM_RSRC2: 0x%08x\n", config->rsrc1, config->rsrc2);
+
radeon_set_sh_reg(cs, R_00B860_COMPUTE_TMPRING_SIZE,
  S_00B860_WAVES(sctx->scratch_waves)
 | S_00B860_WAVESIZE(config->scratch_bytes_per_wave >> 10));
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 2/2] radeonsi/compute: Use the HSA abi for non-TGSI compute shaders v2

2016-09-13 Thread Tom Stellard

This patch switches non-TGSI compute shaders over to using the HSA
ABI described here:

https://github.com/RadeonOpenCompute/ROCm-Docs/blob/master/AMDGPU-ABI.md

The HSA ABI provides a much cleaner interface for compute shaders and allows
us to share more code in the compiler with the HSA stack.

The main changes in this patch are:
  - We now pass the scratch buffer resource into the shader via user sgprs
rather than using relocations.
  - Grid/Block sizes are now passed to the shader via the dispatch packet
rather than at the beginning of the kernel arguments.

Typically for HSA, the CP firmware will create the dispatch packet and set
up the user sgprs automatically.  However, in Mesa we let the driver do
this work.  The main reason for this is that I haven't researched how to
get the CP to do all these things, and I'm not sure if it is supported
for all GPUs.

v2:
  - Add comments explaining why we are setting certian bits of the scratch
resource descriptor.
---
 src/gallium/drivers/radeon/r600_pipe_common.c|   6 +-
 src/gallium/drivers/radeonsi/amd_kernel_code_t.h | 534 +++
 src/gallium/drivers/radeonsi/si_compute.c| 236 +-
 3 files changed, 758 insertions(+), 18 deletions(-)
 create mode 100644 src/gallium/drivers/radeonsi/amd_kernel_code_t.h

diff --git a/src/gallium/drivers/radeon/r600_pipe_common.c 
b/src/gallium/drivers/radeon/r600_pipe_common.c
index 6d7cc1b..8f17f36 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.c
+++ b/src/gallium/drivers/radeon/r600_pipe_common.c
@@ -822,7 +822,11 @@ static int r600_get_compute_param(struct pipe_screen 
*screen,
if (rscreen->family <= CHIP_ARUBA) {
triple = "r600--";
} else {
-   triple = "amdgcn--";
+   if (HAVE_LLVM < 0x0400) {
+   triple = "amdgcn--";
+   } else {
+   triple = "amdgcn--mesa3d";
+   }
}
switch(rscreen->family) {
/* Clang < 3.6 is missing Hainan in its list of
diff --git a/src/gallium/drivers/radeonsi/amd_kernel_code_t.h 
b/src/gallium/drivers/radeonsi/amd_kernel_code_t.h
new file mode 100644
index 000..d0d7809
--- /dev/null
+++ b/src/gallium/drivers/radeonsi/amd_kernel_code_t.h
@@ -0,0 +1,534 @@
+/*
+ * Copyright 2015,2016 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef AMDKERNELCODET_H
+#define AMDKERNELCODET_H
+
+//---//
+// AMD Kernel Code, and its dependencies //
+//---//
+
+// Sets val bits for specified mask in specified dst packed instance.
+#define AMD_HSA_BITS_SET(dst, mask, val)   
\
+  dst &= (~(1 << mask ## _SHIFT) & ~mask); 
\
+  dst |= (((val) << mask ## _SHIFT) & mask)
+
+// Gets bits for specified mask from specified src packed instance.
+#define AMD_HSA_BITS_GET(src, mask)
\
+  ((src & mask) >> mask ## _SHIFT) 
\
+
+/* Every amd_*_code_t has the following properties, which are composed of
+ * a number of bit fields. Every bit field has a mask (AMD_CODE_PROPERTY_*),
+ * bit width (AMD_CODE_PROPERTY_*_WIDTH, and bit shift amount
+ * (AMD_CODE_PROPERTY_*_SHIFT) for convenient access. Unused bits must be 0.
+ *
+ * (Note that bit fields cannot be used as their layout is
+ * implementation defined in the C standard and so cannot be used to
+ * specify an ABI)
+ */
+enum amd_code_property_mask_t {
+
+  /* Enable the setup of the SGPR user data registers
+   * (AMD_CODE_PROPERTY_ENAB

[Mesa-dev] [PATCH 2/3] radeonsi: Add function for converting LLVM type to intrinsic string

2016-10-11 Thread Tom Stellard

The existing function only worked for integer types.
---
 src/gallium/drivers/radeonsi/si_shader.c | 42 
 1 file changed, 32 insertions(+), 10 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
b/src/gallium/drivers/radeonsi/si_shader.c
index 8254cb2..4e07317 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -3347,17 +3347,39 @@ static LLVMValueRef get_buffer_size(
  * Given the i32 or vNi32 \p type, generate the textual name (e.g. for use with
  * intrinsic names).
  */
-static void build_int_type_name(
+static void build_type_name_for_intr(
LLVMTypeRef type,
char *buf, unsigned bufsize)
 {
-   assert(bufsize >= 6);
+   LLVMTypeRef elem_type = type;
 
-   if (LLVMGetTypeKind(type) == LLVMVectorTypeKind)
-   snprintf(buf, bufsize, "v%ui32",
-LLVMGetVectorSize(type));
-   else
-   strcpy(buf, "i32");
+   assert(bufsize >= 8);
+
+   if (LLVMGetTypeKind(type) == LLVMVectorTypeKind) {
+   int ret = snprintf(buf, bufsize, "v%u",
+   LLVMGetVectorSize(type));
+   if (ret < 0) {
+   char *type_name = LLVMPrintTypeToString(type);
+   fprintf(stderr, "Error building type name for: %s\n",
+   type_name);
+   return;
+   }
+   elem_type = LLVMGetElementType(type);
+   buf += ret;
+   bufsize -= ret;
+   }
+   switch (LLVMGetTypeKind(elem_type)) {
+   default: break;
+   case LLVMIntegerTypeKind:
+   snprintf(buf, bufsize, "i%d", LLVMGetIntTypeWidth(elem_type));
+   break;
+   case LLVMFloatTypeKind:
+   snprintf(buf, bufsize, "f32");
+   break;
+   case LLVMDoubleTypeKind:
+   snprintf(buf, bufsize, "f64");
+   break;
+   }
 }
 
 static void build_tex_intrinsic(const struct lp_build_tgsi_action *action,
@@ -3744,7 +3766,7 @@ static void get_image_intr_name(const char *base_name,
 {
char coords_type_name[8];
 
-   build_int_type_name(coords_type, coords_type_name,
+   build_type_name_for_intr(coords_type, coords_type_name,
sizeof(coords_type_name));
 
snprintf(out_name, out_len, "%s.%s", base_name, coords_type_name);
@@ -4144,7 +4166,7 @@ static void atomic_emit(
} else {
char coords_type[8];
 
-   build_int_type_name(LLVMTypeOf(emit_data->args[1]),
+   build_type_name_for_intr(LLVMTypeOf(emit_data->args[1]),
coords_type, sizeof(coords_type));
snprintf(intrinsic_name, sizeof(intrinsic_name),
 "llvm.amdgcn.image.atomic.%s.%s",
@@ -4918,7 +4940,7 @@ static void build_tex_intrinsic(const struct 
lp_build_tgsi_action *action,
}
 
/* Add the type and suffixes .c, .o if needed. */
-   build_int_type_name(LLVMTypeOf(emit_data->args[0]), type, sizeof(type));
+   build_type_name_for_intr(LLVMTypeOf(emit_data->args[0]), type, 
sizeof(type));
sprintf(intr_name, "%s%s%s%s.%s",
name, is_shadow ? ".c" : "", infix,
has_offset ? ".o" : "", type);
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 3/3] radeonsi: Use the new image load/store intrinsic signatures

2016-10-11 Thread Tom Stellard

---
 src/gallium/drivers/radeonsi/si_shader.c | 59 +---
 1 file changed, 46 insertions(+), 13 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
b/src/gallium/drivers/radeonsi/si_shader.c
index 4e07317..1f1fdf2 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -3575,16 +3575,29 @@ static void image_append_args(
const struct tgsi_full_instruction *inst = emit_data->inst;
LLVMValueRef i1false = LLVMConstInt(ctx->i1, 0, 0);
LLVMValueRef i1true = LLVMConstInt(ctx->i1, 1, 0);
-
-   emit_data->args[emit_data->arg_count++] = i1false; /* r128 */
-   emit_data->args[emit_data->arg_count++] =
-   tgsi_is_array_image(target) ? i1true : i1false; /* da */
-   if (!atomic) {
-   emit_data->args[emit_data->arg_count++] =
-   inst->Memory.Qualifier & (TGSI_MEMORY_COHERENT | 
TGSI_MEMORY_VOLATILE) ?
-   i1true : i1false; /* glc */
+   LLVMValueRef r128 = i1false;
+   LLVMValueRef da = tgsi_is_array_image(target) ? i1true : i1false;
+   LLVMValueRef glc =
+   inst->Memory.Qualifier & (TGSI_MEMORY_COHERENT | 
TGSI_MEMORY_VOLATILE) ?
+   i1true : i1false;
+   LLVMValueRef slc = i1false;
+   LLVMValueRef lwe = i1false;
+
+   if (atomic || (HAVE_LLVM <= 0x0309)) {
+   emit_data->args[emit_data->arg_count++] = r128;
+   emit_data->args[emit_data->arg_count++] = da;
+   if (!atomic) {
+   emit_data->args[emit_data->arg_count++] = glc;
+   }
+   emit_data->args[emit_data->arg_count++] = slc;
+   return;
}
-   emit_data->args[emit_data->arg_count++] = i1false; /* slc */
+
+   /* HAVE_LLVM >= 0x0400 */
+   emit_data->args[emit_data->arg_count++] = glc;
+   emit_data->args[emit_data->arg_count++] = slc;
+   emit_data->args[emit_data->arg_count++] = lwe;
+   emit_data->args[emit_data->arg_count++] = da;
 }
 
 /**
@@ -3761,7 +3774,9 @@ static void load_emit_memory(
 }
 
 static void get_image_intr_name(const char *base_name,
+   LLVMTypeRef data_type,
LLVMTypeRef coords_type,
+   LLVMTypeRef rsrc_type,
char *out_name, unsigned out_len)
 {
char coords_type_name[8];
@@ -3769,7 +3784,21 @@ static void get_image_intr_name(const char *base_name,
build_type_name_for_intr(coords_type, coords_type_name,
sizeof(coords_type_name));
 
+#if HAVE_LLVM <= 0x0309
snprintf(out_name, out_len, "%s.%s", base_name, coords_type_name);
+#else
+   {
+   char data_type_name[8];
+   char rsrc_type_name[8];
+
+   build_type_name_for_intr(data_type, data_type_name,
+   sizeof(data_type_name));
+   build_type_name_for_intr(rsrc_type, rsrc_type_name,
+   sizeof(rsrc_type_name));
+   snprintf(out_name, out_len, "%s.%s.%s.%s", base_name,
+data_type_name, coords_type_name, rsrc_type_name);
+   }
+#endif
 }
 
 static void load_emit(
@@ -3781,7 +3810,7 @@ static void load_emit(
struct gallivm_state *gallivm = bld_base->base.gallivm;
LLVMBuilderRef builder = gallivm->builder;
const struct tgsi_full_instruction * inst = emit_data->inst;
-   char intrinsic_name[32];
+   char intrinsic_name[64];
 
if (inst->Src[0].Register.File == TGSI_FILE_MEMORY) {
load_emit_memory(ctx, emit_data);
@@ -3804,7 +3833,9 @@ static void load_emit(
LLVMReadOnlyAttribute);
} else {
get_image_intr_name("llvm.amdgcn.image.load",
-   LLVMTypeOf(emit_data->args[0]),
+   emit_data->dst_type,/* vdata */
+   LLVMTypeOf(emit_data->args[0]), /* coords */
+   LLVMTypeOf(emit_data->args[1]), /* rsrc */
intrinsic_name, sizeof(intrinsic_name));
 
emit_data->output[emit_data->chan] =
@@ -3981,7 +4012,7 @@ static void store_emit(
LLVMBuilderRef builder = gallivm->builder;
const struct tgsi_full_instruction * inst = emit_data->inst;
unsigned target = inst->Memory.Texture;
-   char intrinsic_name[32];
+   char intrinsic_name[64];
 
if (inst->Dst[0].Register.File == TGSI_FILE_MEMORY) {
store_emit_memory(ctx, emit_data);
@@ -4003,7 +4034,9 @@ static void store_emit(
emit_data->arg_count, 0);
} else {
get_image_intr_name("llvm.amdgcn.image.store",
-   LLVMTypeOf(emit_data->args[1]),
+

[Mesa-dev] [PATCH 1/3] radeonsi: Refactor image store/load intrinsic name creation

2016-10-11 Thread Tom Stellard

---
 src/gallium/drivers/radeonsi/si_shader.c | 29 ++---
 1 file changed, 18 insertions(+), 11 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
b/src/gallium/drivers/radeonsi/si_shader.c
index 49d4121..8254cb2 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -3738,6 +3738,18 @@ static void load_emit_memory(
emit_data->output[emit_data->chan] = lp_build_gather_values(gallivm, 
channels, 4);
 }
 
+static void get_image_intr_name(const char *base_name,
+   LLVMTypeRef coords_type,
+   char *out_name, unsigned out_len)
+{
+   char coords_type_name[8];
+
+   build_int_type_name(coords_type, coords_type_name,
+   sizeof(coords_type_name));
+
+   snprintf(out_name, out_len, "%s.%s", base_name, coords_type_name);
+}
+
 static void load_emit(
const struct lp_build_tgsi_action *action,
struct lp_build_tgsi_context *bld_base,
@@ -3748,7 +3760,6 @@ static void load_emit(
LLVMBuilderRef builder = gallivm->builder;
const struct tgsi_full_instruction * inst = emit_data->inst;
char intrinsic_name[32];
-   char coords_type[8];
 
if (inst->Src[0].Register.File == TGSI_FILE_MEMORY) {
load_emit_memory(ctx, emit_data);
@@ -3770,11 +3781,9 @@ static void load_emit(
emit_data->args, emit_data->arg_count,
LLVMReadOnlyAttribute);
} else {
-   build_int_type_name(LLVMTypeOf(emit_data->args[0]),
-   coords_type, sizeof(coords_type));
-
-   snprintf(intrinsic_name, sizeof(intrinsic_name),
-"llvm.amdgcn.image.load.%s", coords_type);
+   get_image_intr_name("llvm.amdgcn.image.load",
+   LLVMTypeOf(emit_data->args[0]),
+   intrinsic_name, sizeof(intrinsic_name));
 
emit_data->output[emit_data->chan] =
lp_build_intrinsic(
@@ -3951,7 +3960,6 @@ static void store_emit(
const struct tgsi_full_instruction * inst = emit_data->inst;
unsigned target = inst->Memory.Texture;
char intrinsic_name[32];
-   char coords_type[8];
 
if (inst->Dst[0].Register.File == TGSI_FILE_MEMORY) {
store_emit_memory(ctx, emit_data);
@@ -3972,10 +3980,9 @@ static void store_emit(
emit_data->dst_type, emit_data->args,
emit_data->arg_count, 0);
} else {
-   build_int_type_name(LLVMTypeOf(emit_data->args[1]),
-   coords_type, sizeof(coords_type));
-   snprintf(intrinsic_name, sizeof(intrinsic_name),
-"llvm.amdgcn.image.store.%s", coords_type);
+   get_image_intr_name("llvm.amdgcn.image.store",
+   LLVMTypeOf(emit_data->args[1]),
+   intrinsic_name, sizeof(intrinsic_name));
 
emit_data->output[emit_data->chan] =
lp_build_intrinsic(
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 2/2] [RFC] radv: add scratch support for spilling.

2016-10-11 Thread Tom Stellard

On Tue, Oct 11, 2016 at 03:21:24PM +0200, Nicolai Hähnle wrote:
> On 11.10.2016 07:36, Dave Airlie wrote:
> > On 11 October 2016 at 12:13, Dave Airlie  wrote:
> >> On 11 October 2016 at 11:42, Dave Airlie  wrote:
> >>> On 11 October 2016 at 05:50, Dave Airlie  wrote:
>  On 10 October 2016 at 21:45, Arsenault, Matthew
>   wrote:
> > I don't like adding explicit IR arguments for ABI arguments, especially 
> > this
> > one. Adding a special case for the first index feels dirty. The rest of 
> > llvm
> > also won't be aware of the specialness of the argument. It would be
> > problematic because bugpoint would eliminate the unused argument and 
> > then
> > codegen would have to fail in some way when the argument is missing
> 
> That's a good point, but is there an alternative without burning two 
> userdata SGPRs?
> 
> One possibility is to define an ABI that says:
> 
> 1. SGPR0/1 points to an extra data region; it is reserved independently 
> from the shader arguments.
> 2. The first 64 bits of that extra data region point to the scratch buffer.
> 3. The main shader code can retrieve SGPR0/1 using an intrinsic.
> 
> This can be made to look somewhat similar to what HSA does.
> 

What if we stored all shader inputs in the 'extra data region', with an
ABI that defined fixed offsets in the 'extra data region' for each
input.

Then as an optimization we could have the compiler map the values that
it needed from the 'extra data region' into user sgprs and communicate
this back to the driver.

This gets us something that works very quickly and still allows us to do
optimizations in the future.

-Tom

> 
>  We should just hardcode the behaviour and switch both radv/radeonsi
>  over in one go?
> 
>  I'll try and code up, using the first 64-bits of the first buffer
>  pointed to by userdata 0/1,
>  to store things.
> >>>
> >>> I've looked at doing a dword fetch from the first two words of the 0/1 
> >>> userdata,
> >>>
> >>> It's not optimal for vulkan unfortunately, since the idea I had was per 
> >>> command
> >>> buffer I just allocate one scratch buffer of the size required at the 
> >>> end, and
> >>> patch it in at the start of the command buffer. However in the first
> >>> slot I was going
> >>> to use the push constants/dynamic buffer to store the value, however it 
> >>> looks
> >>> like I need to keep a list of everyone of these buffers I emit, and
> >>> backpatch them
> >>> all. It might not be too insane, just a slight bump in the keeping it 
> >>> simple.
> >>
> >> I'm probably losing te plot here, but I'm considering a double indirection,
> >>
> >> we load the 64-bit address from the first two dwords, then load the
> >> 64-bits dword
> >> from that address to get the value.
> >>
> >> This saves me allocating scratch bo's for secondary command buffers,
> >> and also having to allocating ever increasing scratch bo's as shaders that
> >> need more scratch get bound to the pipeline.
> >> I'm not sure how much of an effect this should have for GL though.
> >
> > I've posted a patch to this affect to the llvm phabricator.
> >
> > It definitely is cleaner for the radv driver.
> 
> I still think it would be nice to have the level of indirection or 
> whatever one wants to call it as a function attribute. This would allow 
> you to change your mind about e.g. just sticking the scratch pointer 
> directly into SGPR0/1. radeonsi and radv don't have to be identical in 
> that regard.
> 
> Cheers
> Nicolai
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 1/2] radv: Fix incorrect comment

2016-10-13 Thread Tom Stellard

---
 src/amd/common/ac_nir_to_llvm.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index e6ff7c8..9c764c7 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -2373,8 +2373,8 @@ static void visit_image_store(struct nir_to_llvm_context 
*ctx,
bool da = glsl_sampler_type_is_array(type) ||
  glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_CUBE;
 
-   params[0] = get_src(ctx, instr->src[2]); /* coords */
-   params[1] = get_image_coords(ctx, instr, false);
+   params[0] = get_src(ctx, instr->src[2]);
+   params[1] = get_image_coords(ctx, instr, false); /* coords */
params[2] = get_sampler_desc(ctx, instr->variables[0], 
DESC_IMAGE);
params[3] = LLVMConstInt(ctx->i32, 15, false); /* dmask */
params[4] = i1false;  /* r128 */
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 2/2] radv: Use new image load/store intrinsic signatures

2016-10-13 Thread Tom Stellard

These were changed in LLVM r284024.
---
 src/amd/common/ac_nir_to_llvm.c | 131 
 1 file changed, 107 insertions(+), 24 deletions(-)

diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index 9c764c7..4fba7d3 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -2296,13 +2296,73 @@ static LLVMValueRef get_image_coords(struct 
nir_to_llvm_context *ctx,
return res;
 }
 
+static void build_type_name_for_intr(
+LLVMTypeRef type,
+char *buf, unsigned bufsize)
+{
+LLVMTypeRef elem_type = type;
+
+assert(bufsize >= 8);
+
+if (LLVMGetTypeKind(type) == LLVMVectorTypeKind) {
+int ret = snprintf(buf, bufsize, "v%u",
+LLVMGetVectorSize(type));
+if (ret < 0) {
+char *type_name = LLVMPrintTypeToString(type);
+fprintf(stderr, "Error building type name for: %s\n",
+type_name);
+return;
+}
+elem_type = LLVMGetElementType(type);
+buf += ret;
+bufsize -= ret;
+}
+switch (LLVMGetTypeKind(elem_type)) {
+default: break;
+case LLVMIntegerTypeKind:
+snprintf(buf, bufsize, "i%d", LLVMGetIntTypeWidth(elem_type));
+break;
+case LLVMFloatTypeKind:
+snprintf(buf, bufsize, "f32");
+break;
+case LLVMDoubleTypeKind:
+snprintf(buf, bufsize, "f64");
+break;
+}
+}
+
+static void get_image_intr_name(const char *base_name,
+LLVMTypeRef data_type,
+LLVMTypeRef coords_type,
+LLVMTypeRef rsrc_type,
+char *out_name, unsigned out_len)
+{
+char coords_type_name[8];
+
+build_type_name_for_intr(coords_type, coords_type_name,
+sizeof(coords_type_name));
+
+if (HAVE_LLVM <= 0x0309) {
+snprintf(out_name, out_len, "%s.%s", base_name, 
coords_type_name);
+} else {
+char data_type_name[8];
+char rsrc_type_name[8];
+
+build_type_name_for_intr(data_type, data_type_name,
+sizeof(data_type_name));
+build_type_name_for_intr(rsrc_type, rsrc_type_name,
+sizeof(rsrc_type_name));
+snprintf(out_name, out_len, "%s.%s.%s.%s", base_name,
+ data_type_name, coords_type_name, rsrc_type_name);
+}
+}
+
 static LLVMValueRef visit_image_load(struct nir_to_llvm_context *ctx,
 nir_intrinsic_instr *instr)
 {
LLVMValueRef params[7];
LLVMValueRef res;
-   char intrinsic_name[32];
-   char coords_type[8];
+   char intrinsic_name[64];
const nir_variable *var = instr->variables[0]->var;
const struct glsl_type *type = var->type;
if(instr->variables[0]->deref.child)
@@ -2322,23 +2382,35 @@ static LLVMValueRef visit_image_load(struct 
nir_to_llvm_context *ctx,
res = trim_vector(ctx, res, instr->dest.ssa.num_components);
res = to_integer(ctx, res);
} else {
-   bool da = glsl_sampler_type_is_array(type) ||
- glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_CUBE;
+   bool is_da = glsl_sampler_type_is_array(type) ||
+glsl_get_sampler_dim(type) == 
GLSL_SAMPLER_DIM_CUBE;
bool add_frag_pos = glsl_get_sampler_dim(type) == 
GLSL_SAMPLER_DIM_SUBPASS;
+   LLVMValueRef da = is_da ? ctx->i32one : ctx->i32zero;
+   LLVMValueRef glc = LLVMConstInt(ctx->i1, 0, false);
+   LLVMValueRef slc = LLVMConstInt(ctx->i1, 0, false);
 
params[0] = get_image_coords(ctx, instr, add_frag_pos);
params[1] = get_sampler_desc(ctx, instr->variables[0], 
DESC_IMAGE);
params[2] = LLVMConstInt(ctx->i32, 15, false); /* dmask */
-   params[3] = LLVMConstInt(ctx->i1, 0, false);  /* r128 */
-   params[4] = da ? ctx->i32one : ctx->i32zero; /* da */
-   params[5] = LLVMConstInt(ctx->i1, 0, false);  /* glc */
-   params[6] = LLVMConstInt(ctx->i1, 0, false);  /* slc */
+   if (HAVE_LLVM <= 0x0309) {
+   params[3] = LLVMConstInt(ctx->i1, 0, false);  /* r128 */
+   params[4] = da;
+   params[5] = glc;
+   params[6] = slc;
+   } else {
+   LLVMValueRef lwe = LLVMConstInt(ctx->i1, 0, false);
+   params[3] = glc;
+

[Mesa-dev] [PATCH 1/2] radv: Fix incorrect comment

2016-10-13 Thread Tom Stellard

---
 src/amd/common/ac_nir_to_llvm.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index e6ff7c8..9c764c7 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -2373,8 +2373,8 @@ static void visit_image_store(struct nir_to_llvm_context 
*ctx,
bool da = glsl_sampler_type_is_array(type) ||
  glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_CUBE;
 
-   params[0] = get_src(ctx, instr->src[2]); /* coords */
-   params[1] = get_image_coords(ctx, instr, false);
+   params[0] = get_src(ctx, instr->src[2]);
+   params[1] = get_image_coords(ctx, instr, false); /* coords */
params[2] = get_sampler_desc(ctx, instr->variables[0], 
DESC_IMAGE);
params[3] = LLVMConstInt(ctx->i32, 15, false); /* dmask */
params[4] = i1false;  /* r128 */
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 2/2] radv: Use new image load/store intrinsic signatures

2016-10-13 Thread Tom Stellard

On Thu, Oct 13, 2016 at 07:20:30PM +0200, Kai Wasserbäch wrote:
> Dear Tom,
> just FYI: this fails to apply on top of master
> (761388a0eb586b1dcaec063ee561056ed132dc1a). git am chokes on the
> visit_image_store() hunk for me. Attached is a "refreshed" version, which
> applies for me. I hope I didn't butcher anything inadvertently.
> 

Hi,

I just sent rebased patches.  Can you try those.

-Tom

> Cheers,
> Kai
> 
> 
> Tom Stellard wrote on 13.10.2016 17:21:
> > These were changed in LLVM r284024.
> > ---
> >  src/amd/common/ac_nir_to_llvm.c | 131 
> > 
> >  1 file changed, 107 insertions(+), 24 deletions(-)
> > 
> > diff --git a/src/amd/common/ac_nir_to_llvm.c 
> > b/src/amd/common/ac_nir_to_llvm.c
> > index 9c764c7..4fba7d3 100644
> > --- a/src/amd/common/ac_nir_to_llvm.c
> > +++ b/src/amd/common/ac_nir_to_llvm.c
> > @@ -2296,13 +2296,73 @@ static LLVMValueRef get_image_coords(struct 
> > nir_to_llvm_context *ctx,
> > return res;
> >  }
> >  
> > +static void build_type_name_for_intr(
> > +LLVMTypeRef type,
> > +char *buf, unsigned bufsize)
> > +{
> > +LLVMTypeRef elem_type = type;
> > +
> > +assert(bufsize >= 8);
> > +
> > +if (LLVMGetTypeKind(type) == LLVMVectorTypeKind) {
> > +int ret = snprintf(buf, bufsize, "v%u",
> > +LLVMGetVectorSize(type));
> > +if (ret < 0) {
> > +char *type_name = LLVMPrintTypeToString(type);
> > +fprintf(stderr, "Error building type name for: 
> > %s\n",
> > +type_name);
> > +return;
> > +}
> > +elem_type = LLVMGetElementType(type);
> > +buf += ret;
> > +bufsize -= ret;
> > +}
> > +switch (LLVMGetTypeKind(elem_type)) {
> > +default: break;
> > +case LLVMIntegerTypeKind:
> > +snprintf(buf, bufsize, "i%d", 
> > LLVMGetIntTypeWidth(elem_type));
> > +break;
> > +case LLVMFloatTypeKind:
> > +snprintf(buf, bufsize, "f32");
> > +break;
> > +case LLVMDoubleTypeKind:
> > +snprintf(buf, bufsize, "f64");
> > +break;
> > +}
> > +}
> > +
> > +static void get_image_intr_name(const char *base_name,
> > +LLVMTypeRef data_type,
> > +LLVMTypeRef coords_type,
> > +LLVMTypeRef rsrc_type,
> > +char *out_name, unsigned out_len)
> > +{
> > +char coords_type_name[8];
> > +
> > +build_type_name_for_intr(coords_type, coords_type_name,
> > +sizeof(coords_type_name));
> > +
> > +if (HAVE_LLVM <= 0x0309) {
> > +snprintf(out_name, out_len, "%s.%s", base_name, 
> > coords_type_name);
> > +} else {
> > +char data_type_name[8];
> > +char rsrc_type_name[8];
> > +
> > +build_type_name_for_intr(data_type, data_type_name,
> > +sizeof(data_type_name));
> > +build_type_name_for_intr(rsrc_type, rsrc_type_name,
> > +sizeof(rsrc_type_name));
> > +snprintf(out_name, out_len, "%s.%s.%s.%s", base_name,
> > + data_type_name, coords_type_name, rsrc_type_name);
> > +}
> > +}
> > +
> >  static LLVMValueRef visit_image_load(struct nir_to_llvm_context *ctx,
> >  nir_intrinsic_instr *instr)
> >  {
> > LLVMValueRef params[7];
> > LLVMValueRef res;
> > -   char intrinsic_name[32];
> > -   char coords_type[8];
> > +   char intrinsic_name[64];
> > const nir_variable *var = instr->variables[0]->var;
> > const struct glsl_type *type = var->type;
> > if(instr->variables[0]->deref.child)
> > @@ -2322,23 +2382,35 @@ static LLVMValueRef visit_image_load(struct 
> > nir_to_llvm_context *ctx,
> > res = trim_vector(ctx, res, instr->dest.ssa.num_components);
> > res = to_integer(ctx, res);

[Mesa-dev] [PATCH 2/2] radv: Use new image load/store intrinsic signatures v2

2016-10-13 Thread Tom Stellard

These were changed in LLVM r284024.

v2:
  - Only use float types for vdata of llvm.amdgcn.image.store.  LLVM doesn't
support integer types for this intrinsic.
---
 src/amd/common/ac_nir_to_llvm.c | 133 
 1 file changed, 108 insertions(+), 25 deletions(-)

diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index 9c764c7..56814ec 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -2296,13 +2296,73 @@ static LLVMValueRef get_image_coords(struct 
nir_to_llvm_context *ctx,
return res;
 }
 
+static void build_type_name_for_intr(
+LLVMTypeRef type,
+char *buf, unsigned bufsize)
+{
+LLVMTypeRef elem_type = type;
+
+assert(bufsize >= 8);
+
+if (LLVMGetTypeKind(type) == LLVMVectorTypeKind) {
+int ret = snprintf(buf, bufsize, "v%u",
+LLVMGetVectorSize(type));
+if (ret < 0) {
+char *type_name = LLVMPrintTypeToString(type);
+fprintf(stderr, "Error building type name for: %s\n",
+type_name);
+return;
+}
+elem_type = LLVMGetElementType(type);
+buf += ret;
+bufsize -= ret;
+}
+switch (LLVMGetTypeKind(elem_type)) {
+default: break;
+case LLVMIntegerTypeKind:
+snprintf(buf, bufsize, "i%d", LLVMGetIntTypeWidth(elem_type));
+break;
+case LLVMFloatTypeKind:
+snprintf(buf, bufsize, "f32");
+break;
+case LLVMDoubleTypeKind:
+snprintf(buf, bufsize, "f64");
+break;
+}
+}
+
+static void get_image_intr_name(const char *base_name,
+LLVMTypeRef data_type,
+LLVMTypeRef coords_type,
+LLVMTypeRef rsrc_type,
+char *out_name, unsigned out_len)
+{
+char coords_type_name[8];
+
+build_type_name_for_intr(coords_type, coords_type_name,
+sizeof(coords_type_name));
+
+if (HAVE_LLVM <= 0x0309) {
+snprintf(out_name, out_len, "%s.%s", base_name, 
coords_type_name);
+} else {
+char data_type_name[8];
+char rsrc_type_name[8];
+
+build_type_name_for_intr(data_type, data_type_name,
+sizeof(data_type_name));
+build_type_name_for_intr(rsrc_type, rsrc_type_name,
+sizeof(rsrc_type_name));
+snprintf(out_name, out_len, "%s.%s.%s.%s", base_name,
+ data_type_name, coords_type_name, rsrc_type_name);
+}
+}
+
 static LLVMValueRef visit_image_load(struct nir_to_llvm_context *ctx,
 nir_intrinsic_instr *instr)
 {
LLVMValueRef params[7];
LLVMValueRef res;
-   char intrinsic_name[32];
-   char coords_type[8];
+   char intrinsic_name[64];
const nir_variable *var = instr->variables[0]->var;
const struct glsl_type *type = var->type;
if(instr->variables[0]->deref.child)
@@ -2322,23 +2382,35 @@ static LLVMValueRef visit_image_load(struct 
nir_to_llvm_context *ctx,
res = trim_vector(ctx, res, instr->dest.ssa.num_components);
res = to_integer(ctx, res);
} else {
-   bool da = glsl_sampler_type_is_array(type) ||
- glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_CUBE;
+   bool is_da = glsl_sampler_type_is_array(type) ||
+glsl_get_sampler_dim(type) == 
GLSL_SAMPLER_DIM_CUBE;
bool add_frag_pos = glsl_get_sampler_dim(type) == 
GLSL_SAMPLER_DIM_SUBPASS;
+   LLVMValueRef da = is_da ? ctx->i32one : ctx->i32zero;
+   LLVMValueRef glc = LLVMConstInt(ctx->i1, 0, false);
+   LLVMValueRef slc = LLVMConstInt(ctx->i1, 0, false);
 
params[0] = get_image_coords(ctx, instr, add_frag_pos);
params[1] = get_sampler_desc(ctx, instr->variables[0], 
DESC_IMAGE);
params[2] = LLVMConstInt(ctx->i32, 15, false); /* dmask */
-   params[3] = LLVMConstInt(ctx->i1, 0, false);  /* r128 */
-   params[4] = da ? ctx->i32one : ctx->i32zero; /* da */
-   params[5] = LLVMConstInt(ctx->i1, 0, false);  /* glc */
-   params[6] = LLVMConstInt(ctx->i1, 0, false);  /* slc */
+   if (HAVE_LLVM <= 0x0309) {
+   params[3] = LLVMConstInt(ctx->i1, 0, false);  /* r128 */
+   params[4] = da;
+   params[5] = glc;
+   params[6] = slc;
+   } else {
+

Re: [Mesa-dev] [PATCH 08/19] gallium/radeon: clean up emit_declaration for temporaries

2016-08-10 Thread Tom Stellard

On Tue, Aug 09, 2016 at 12:36:37PM +0200, Nicolai Hähnle wrote:
> From: Nicolai Hähnle 
> 
> In the alloca'd array case, no longer create redundant and unused allocas
> for the individual elements; create getelementptrs instead.

Reviewed-by: Tom Stellard 
> ---
>  .../drivers/radeon/radeon_setup_tgsi_llvm.c| 27 
> ++
>  1 file changed, 18 insertions(+), 9 deletions(-)
> 
> diff --git a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c 
> b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
> index d75311e..41f24d3 100644
> --- a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
> +++ b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
> @@ -408,81 +408,90 @@ static LLVMValueRef si_build_alloca_undef(struct 
> gallivm_state *gallivm,
>   LLVMValueRef ptr = lp_build_alloca(gallivm, type, name);
>   LLVMBuildStore(gallivm->builder, LLVMGetUndef(type), ptr);
>   return ptr;
>  }
>  
>  static void emit_declaration(struct lp_build_tgsi_context *bld_base,
>const struct tgsi_full_declaration *decl)
>  {
>   struct radeon_llvm_context *ctx = radeon_llvm_context(bld_base);
>   LLVMBuilderRef builder = bld_base->base.gallivm->builder;
> - unsigned first, last, i, idx;
> + unsigned first, last, i;
>   switch(decl->Declaration.File) {
>   case TGSI_FILE_ADDRESS:
>   {
>unsigned idx;
>   for (idx = decl->Range.First; idx <= decl->Range.Last; idx++) {
>   unsigned chan;
>   for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
>ctx->soa.addr[idx][chan] = 
> si_build_alloca_undef(
>   &ctx->gallivm,
>   ctx->soa.bld_base.uint_bld.elem_type, 
> "");
>   }
>   }
>   break;
>   }
>  
>   case TGSI_FILE_TEMPORARY:
>   {
> + LLVMValueRef array_alloca = NULL;
>   unsigned decl_size;
>   first = decl->Range.First;
>   last = decl->Range.Last;
>   decl_size = 4 * ((last - first) + 1);
>   if (decl->Declaration.Array) {
>   unsigned id = decl->Array.ArrayID - 1;
>   if (!ctx->arrays) {
>   int size = 
> bld_base->info->array_max[TGSI_FILE_TEMPORARY];
>   ctx->arrays = CALLOC(size, 
> sizeof(ctx->arrays[0]));
> - for (i = 0; i < size; ++i) {
> - assert(!ctx->arrays[i].alloca);}
>   }
>  
>   ctx->arrays[id].range = decl->Range;
>  
>   /* If the array is more than 16 elements (each element
>* is 32-bits), then store it in a vector.  Storing the
>* array in a vector will causes the compiler to store
>* the array in registers and access it using indirect
>* addressing.  16 is number of vector elements that
>* LLVM will store in a register.
>* FIXME: We shouldn't need to do this.  LLVM should be
>* smart enough to promote allocas int registers when
>* profitable.
>*/
>   if (decl_size > 16) {
> - ctx->arrays[id].alloca = 
> LLVMBuildAlloca(builder,
> + array_alloca = LLVMBuildAlloca(builder,
>   LLVMArrayType(bld_base->base.vec_type, 
> decl_size),"array");
> + ctx->arrays[id].alloca = array_alloca;
>   }
>   }
> - first = decl->Range.First;
> - last = decl->Range.Last;
> +
>   if (!ctx->temps_count) {
>   ctx->temps_count = 
> bld_base->info->file_max[TGSI_FILE_TEMPORARY] + 1;
>   ctx->temps = MALLOC(TGSI_NUM_CHANNELS * 
> ctx->temps_count * sizeof(LLVMValueRef));
>   }
> - for (idx = first; idx <= last; idx++) {
> - for (i = 0; i < TGSI_NUM_CHANNELS; i++) {
> - ctx->temps[idx * TGSI_NUM_CHANNELS + i] =
> + if (!array_alloca) {
> + for (i = 0; i < decl_size; ++i) {
> + ctx->temps[first * TGSI_NUM_CHANN

Re: [Mesa-dev] [PATCH 09/19] gallium/radeon: simplify radeon_llvm_emit_fetch for direct array addressing

2016-08-10 Thread Tom Stellard

On Tue, Aug 09, 2016 at 12:36:38PM +0200, Nicolai Hähnle wrote:
> From: Nicolai Hähnle 
> 
> We can use the pointer stored in the temps array directly.

Reviewed-by: Tom Stellard 
> ---
>  src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c | 5 -
>  1 file changed, 5 deletions(-)
> 
> diff --git a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c 
> b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
> index 41f24d3..e084248 100644
> --- a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
> +++ b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
> @@ -352,25 +352,20 @@ LLVMValueRef radeon_llvm_emit_fetch(struct 
> lp_build_tgsi_context *bld_base,
>   case TGSI_FILE_TEMPORARY:
>   if (reg->Register.Index >= ctx->temps_count)
>   return LLVMGetUndef(tgsi2llvmtype(bld_base, type));
>   ptr = ctx->temps[reg->Register.Index * TGSI_NUM_CHANNELS + 
> swizzle];
>   if (tgsi_type_is_64bit(type)) {
>   ptr2 = ctx->temps[reg->Register.Index * 
> TGSI_NUM_CHANNELS + swizzle + 1];
>   return radeon_llvm_emit_fetch_64bit(bld_base, type,
>LLVMBuildLoad(builder, ptr, 
> ""),
>LLVMBuildLoad(builder, ptr2, 
> ""));
>   }
> - LLVMValueRef array = get_alloca_for_array(bld_base, 
> reg->Register.File, reg->Register.Index);
> - if (array) {
> - return bitcast(bld_base, type, 
> load_value_from_array(bld_base, reg->Register.File, type,
> - swizzle, reg->Register.Index, NULL));
> - }
>   result = LLVMBuildLoad(builder, ptr, "");
>   break;
>  
>   case TGSI_FILE_OUTPUT:
>   ptr = lp_get_output_ptr(bld, reg->Register.Index, swizzle);
>   if (tgsi_type_is_64bit(type)) {
>   ptr2 = lp_get_output_ptr(bld, reg->Register.Index, 
> swizzle + 1);
>   return radeon_llvm_emit_fetch_64bit(bld_base, type,
>LLVMBuildLoad(builder, ptr, 
> ""),
>LLVMBuildLoad(builder, ptr2, 
> ""));
> -- 
> 2.7.4
> 
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 10/19] gallium/radeon: simplify radeon_llvm_emit_store for direct array addressing

2016-08-10 Thread Tom Stellard

On Tue, Aug 09, 2016 at 12:36:39PM +0200, Nicolai Hähnle wrote:
> From: Nicolai Hähnle 
> 
> We can use the pointer stored in the temps array directly.

Reviewed-by: Tom Stellard 
> ---
>  src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c | 7 ---
>  1 file changed, 7 deletions(-)
> 
> diff --git a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c 
> b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
> index e084248..7b96a58 100644
> --- a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
> +++ b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
> @@ -624,30 +624,23 @@ void radeon_llvm_emit_store(struct 
> lp_build_tgsi_context *bld_base,
>   } else {
>   switch(reg->Register.File) {
>   case TGSI_FILE_OUTPUT:
>   temp_ptr = 
> bld->outputs[reg->Register.Index][chan_index];
>   if (tgsi_type_is_64bit(dtype))
>   temp_ptr2 = 
> bld->outputs[reg->Register.Index][chan_index + 1];
>   break;
>  
>   case TGSI_FILE_TEMPORARY:
>   {
> - LLVMValueRef array;
>   if (reg->Register.Index >= ctx->temps_count)
>   continue;
> - array = get_alloca_for_array(bld_base, 
> reg->Register.File, reg->Register.Index);
>  
> - if (array) {
> - store_value_to_array(bld_base, value, 
> reg->Register.File, chan_index, reg->Register.Index,
> - NULL);
> - continue;
> - }
>   temp_ptr = ctx->temps[ TGSI_NUM_CHANNELS * 
> reg->Register.Index + chan_index];
>   if (tgsi_type_is_64bit(dtype))
>   temp_ptr2 = ctx->temps[ 
> TGSI_NUM_CHANNELS * reg->Register.Index + chan_index + 1];
>  
>   break;
>   }
>   default:
>   return;
>   }
>   if (!tgsi_type_is_64bit(dtype))
> -- 
> 2.7.4
> 
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 11/19] gallium/radeon: more descriptive names for LLVM temporaries in debug builds

2016-08-10 Thread Tom Stellard

On Tue, Aug 09, 2016 at 12:36:40PM +0200, Nicolai Hähnle wrote:
> From: Nicolai Hähnle 
> 
This is a great idea.

Reviewed-by: Tom Stellard 

> ---
>  src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c | 14 --
>  1 file changed, 12 insertions(+), 2 deletions(-)
> 
> diff --git a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c 
> b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
> index 7b96a58..22ff18e 100644
> --- a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
> +++ b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
> @@ -31,20 +31,21 @@
>  #include "gallivm/lp_bld_init.h"
>  #include "gallivm/lp_bld_intr.h"
>  #include "gallivm/lp_bld_misc.h"
>  #include "gallivm/lp_bld_swizzle.h"
>  #include "tgsi/tgsi_info.h"
>  #include "tgsi/tgsi_parse.h"
>  #include "util/u_math.h"
>  #include "util/u_memory.h"
>  #include "util/u_debug.h"
>  
> +#include 
>  #include 
>  #include 
>  
>  LLVMTypeRef tgsi2llvmtype(struct lp_build_tgsi_context *bld_base,
> enum tgsi_opcode_type type)
>  {
>   LLVMContextRef ctx = bld_base->base.gallivm->context;
>  
>   switch (type) {
>   case TGSI_TYPE_UNSIGNED:
> @@ -421,20 +422,21 @@ static void emit_declaration(struct 
> lp_build_tgsi_context *bld_base,
>ctx->soa.addr[idx][chan] = 
> si_build_alloca_undef(
>   &ctx->gallivm,
>   ctx->soa.bld_base.uint_bld.elem_type, 
> "");
>   }
>   }
>   break;
>   }
>  
>   case TGSI_FILE_TEMPORARY:
>   {
> + char name[16] = "";
>   LLVMValueRef array_alloca = NULL;
>   unsigned decl_size;
>   first = decl->Range.First;
>   last = decl->Range.Last;
>   decl_size = 4 * ((last - first) + 1);
>   if (decl->Declaration.Array) {
>   unsigned id = decl->Array.ArrayID - 1;
>   if (!ctx->arrays) {
>   int size = 
> bld_base->info->array_max[TGSI_FILE_TEMPORARY];
>   ctx->arrays = CALLOC(size, 
> sizeof(ctx->arrays[0]));
> @@ -458,34 +460,42 @@ static void emit_declaration(struct 
> lp_build_tgsi_context *bld_base,
>   ctx->arrays[id].alloca = array_alloca;
>   }
>   }
>  
>   if (!ctx->temps_count) {
>   ctx->temps_count = 
> bld_base->info->file_max[TGSI_FILE_TEMPORARY] + 1;
>   ctx->temps = MALLOC(TGSI_NUM_CHANNELS * 
> ctx->temps_count * sizeof(LLVMValueRef));
>   }
>   if (!array_alloca) {
>   for (i = 0; i < decl_size; ++i) {
> +#ifdef DEBUG
> + snprintf(name, sizeof(name), "TEMP%d.%c",
> +  first + i / 4, "xyzw"[i % 4]);
> +#endif
>   ctx->temps[first * TGSI_NUM_CHANNELS + i] =
>   
> si_build_alloca_undef(bld_base->base.gallivm,
> 
> bld_base->base.vec_type,
> -   "temp");
> +   name);
>   }
>   } else {
>   LLVMValueRef idxs[2] = {
>   bld_base->uint_bld.zero,
>   NULL
>   };
>   for (i = 0; i < decl_size; ++i) {
> +#ifdef DEBUG
> + snprintf(name, sizeof(name), "TEMP%d.%c",
> +  first + i / 4, "xyzw"[i % 4]);
> +#endif
>   idxs[1] = 
> lp_build_const_int32(bld_base->base.gallivm, i);
>   ctx->temps[first * TGSI_NUM_CHANNELS + i] =
> - LLVMBuildGEP(builder, array_alloca, 
> idxs, 2, "temp");
> + LLVMBuildGEP(builder, array_alloca, 
> idxs, 2, name);
>   }
>   }
>   break;
>   }
>   case TGSI_FILE_INPUT:
>   {
>   unsigned idx;
>   for (idx = decl->Range.First; idx <= decl->Range.Last; idx++) {
>   if (ctx->load_input)
>   ctx->load_input(ctx, idx, decl);
> -- 
> 2.7.4
> 
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] radeonsi: initialize and finalize the LLVM function pass manager

2016-08-18 Thread Tom Stellard

On Fri, Aug 12, 2016 at 01:26:08AM +0200, Marek Olšák wrote:
> From: Marek Olšák 
> 
> we should do that allegedly

Reviewed-by: Tom Stellard 
> ---
>  src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c | 2 ++
>  1 file changed, 2 insertions(+)
> 
> diff --git a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c 
> b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
> index d75311e..e04e26a 100644
> --- a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
> +++ b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
> @@ -1918,21 +1918,23 @@ void radeon_llvm_finalize_module(struct 
> radeon_llvm_context *ctx)
>   LLVMAddPromoteMemoryToRegisterPass(gallivm->passmgr);
>  
>   /* Add some optimization passes */
>   LLVMAddScalarReplAggregatesPass(gallivm->passmgr);
>   LLVMAddLICMPass(gallivm->passmgr);
>   LLVMAddAggressiveDCEPass(gallivm->passmgr);
>   LLVMAddCFGSimplificationPass(gallivm->passmgr);
>   LLVMAddInstructionCombiningPass(gallivm->passmgr);
>  
>   /* Run the pass */
> + LLVMInitializeFunctionPassManager(gallivm->passmgr);
>   LLVMRunFunctionPassManager(gallivm->passmgr, ctx->main_fn);
> + LLVMFinalizeFunctionPassManager(gallivm->passmgr);
>  
>   LLVMDisposeBuilder(gallivm->builder);
>   LLVMDisposePassManager(gallivm->passmgr);
>   gallivm_dispose_target_library_info(target_library_info);
>  }
>  
>  void radeon_llvm_dispose(struct radeon_llvm_context *ctx)
>  {
>   LLVMDisposeModule(ctx->soa.bld_base.base.gallivm->module);
>   LLVMContextDispose(ctx->soa.bld_base.base.gallivm->context);
> -- 
> 2.7.4
> 
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH] radeonsi: Don't use global variables for tess lds

2016-08-26 Thread Tom Stellard

We were allocating global variables for the maximum LDS size
which made the compiler think we were using all of LDS, which
isn't the case.
---
 src/gallium/drivers/radeonsi/si_shader.c | 15 ++-
 1 file changed, 6 insertions(+), 9 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
b/src/gallium/drivers/radeonsi/si_shader.c
index 64c367e..5d972cb 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -5420,16 +5420,13 @@ static unsigned llvm_get_type_size(LLVMTypeRef type)
 static void declare_tess_lds(struct si_shader_context *ctx)
 {
struct gallivm_state *gallivm = &ctx->radeon_bld.gallivm;
-   LLVMTypeRef i32 = ctx->radeon_bld.soa.bld_base.uint_bld.elem_type;
-   unsigned lds_size = ctx->screen->b.chip_class >= CIK ? 65536 : 32768;
+   struct lp_build_tgsi_context *bld_base = &ctx->radeon_bld.soa.bld_base;
+   struct lp_build_context *uint = &bld_base->uint_bld;
 
-   /* The actual size is computed outside of the shader to reduce
-* the number of shader variants. */
-   ctx->lds =
-   LLVMAddGlobalInAddressSpace(gallivm->module,
-   LLVMArrayType(i32, lds_size / 4),
-   "tess_lds",
-   LOCAL_ADDR_SPACE);
+   unsigned lds_size = ctx->screen->b.chip_class >= CIK ? 65536 : 32768;
+   ctx->lds = LLVMBuildIntToPtr(gallivm->builder, uint->zero,
+   LLVMPointerType(LLVMArrayType(ctx->i32, lds_size / 4), 
LOCAL_ADDR_SPACE),
+   "tess_lds");
 }
 
 static void create_function(struct si_shader_context *ctx)
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] nir/radv: workaround broken kilp support

2016-11-02 Thread Tom Stellard

On Wed, Nov 02, 2016 at 11:26:08AM +1000, Dave Airlie wrote:
> So it appears at least the LLVM 3.9 backend can get confused
> when it gets hit with if (cond) discard type constructs, and we
> have a GLSL optimisation to convert this to discard_if, so I've
> ported that to NIR, and enabled it for radv. It fixes the hangs
> and the tests here.
> 

Does this work correctly with llvm master?

-Tom


> Dave.
> 
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH] gallivm: Fix build after removal of deprecated attribute API

2016-11-07 Thread Tom Stellard

---

Build tested only so far.

 src/gallium/auxiliary/draw/draw_llvm.c|  6 +-
 src/gallium/auxiliary/gallivm/lp_bld_intr.c   | 48 +++-
 src/gallium/auxiliary/gallivm/lp_bld_intr.h   | 13 -
 src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c |  4 +-
 src/gallium/drivers/radeonsi/si_shader.c  | 69 ---
 src/gallium/drivers/radeonsi/si_shader_tgsi_alu.c | 24 
 6 files changed, 112 insertions(+), 52 deletions(-)

diff --git a/src/gallium/auxiliary/draw/draw_llvm.c 
b/src/gallium/auxiliary/draw/draw_llvm.c
index 5b4e2a1..5d87318 100644
--- a/src/gallium/auxiliary/draw/draw_llvm.c
+++ b/src/gallium/auxiliary/draw/draw_llvm.c
@@ -1568,8 +1568,7 @@ draw_llvm_generate(struct draw_llvm *llvm, struct 
draw_llvm_variant *variant,
LLVMSetFunctionCallConv(variant_func, LLVMCCallConv);
for (i = 0; i < num_arg_types; ++i)
   if (LLVMGetTypeKind(arg_types[i]) == LLVMPointerTypeKind)
- LLVMAddAttribute(LLVMGetParam(variant_func, i),
-  LLVMNoAliasAttribute);
+ lp_add_function_attr(variant_func, i + 1, "noalias", 7);
 
context_ptr   = LLVMGetParam(variant_func, 0);
io_ptr= LLVMGetParam(variant_func, 1);
@@ -2193,8 +2192,7 @@ draw_gs_llvm_generate(struct draw_llvm *llvm,
 
for (i = 0; i < ARRAY_SIZE(arg_types); ++i)
   if (LLVMGetTypeKind(arg_types[i]) == LLVMPointerTypeKind)
- LLVMAddAttribute(LLVMGetParam(variant_func, i),
-  LLVMNoAliasAttribute);
+ lp_add_function_attr(variant_func, i + 1, "noalias", 7);
 
context_ptr   = LLVMGetParam(variant_func, 0);
input_array   = LLVMGetParam(variant_func, 1);
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_intr.c 
b/src/gallium/auxiliary/gallivm/lp_bld_intr.c
index f12e735..55afe6d 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_intr.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_intr.c
@@ -120,13 +120,53 @@ lp_declare_intrinsic(LLVMModuleRef module,
 }
 
 
+#if HAVE_LLVM < 0x0400
+static LLVMAttribute str_to_attr(const char *attr_name, unsigned attr_len)
+{
+   if (!strncmp("alwaysinline", attr_name, attr_len)) {
+  return LLVMAlwaysInlineAttribute;
+   } else if (!strncmp("byval", attr_name, attr_len)) {
+  return LLVMByValAttribute;
+   } else if (!strncmp("inreg", attr_name, attr_len)) {
+  return LLVMInRegAttribute;
+   } else if (!strncmp("noalias", attr_name, attr_len)) {
+  return LLVMNoAlliasAttribute;
+   } else if (!strncmp("readnone", attr_name, attr_len)) {
+  return LLVMReadNoneAttribute;
+   } else if (!strncmp("readonly", attr_name, attr_len)) {
+  return LLVMReadOnlyAttribute;
+   } else {
+  _debug_printf("Unhandled function attribute: %s\n", attr_name);
+  return 0;
+   }
+}
+#endif
+
+void
+lp_add_function_attr(LLVMValueRef function,
+ unsigned attr_idx,
+ const char *attr_name,
+ unsigned attr_len)
+{
+
+#if HAVE_LLVM < 0x0400
+   LLVMAttribute attr = str_to_attr(attr_name, attr_len);
+   LLVMAddFunctionAttr(function, attr);
+#else
+   LLVMContextRef context = 
LLVMGetModuleContext(LLVMGetGlobalParent(function));
+   unsigned kind_id = LLVMGetEnumAttributeKindForName(attr_name, attr_len);
+   LLVMAttributeRef attr = LLVMCreateEnumAttribute(context, kind_id, 0);
+   LLVMAddAttributeAtIndex(function, attr_idx, attr);
+#endif
+}
+
 LLVMValueRef
 lp_build_intrinsic(LLVMBuilderRef builder,
const char *name,
LLVMTypeRef ret_type,
LLVMValueRef *args,
unsigned num_args,
-   LLVMAttribute attr)
+   const char *attr_str)
 {
LLVMModuleRef module = 
LLVMGetGlobalParent(LLVMGetBasicBlockParent(LLVMGetInsertBlock(builder)));
LLVMValueRef function;
@@ -145,10 +185,14 @@ lp_build_intrinsic(LLVMBuilderRef builder,
 
   function = lp_declare_intrinsic(module, name, ret_type, arg_types, 
num_args);
 
+  if (attr_str) {
+ lp_add_function_attr(function, -1, attr_str, sizeof(attr_str));
+  }
+
   /* NoUnwind indicates that the intrinsic never raises a C++ exception.
* Set it for all intrinsics.
*/
-  LLVMAddFunctionAttr(function, attr | LLVMNoUnwindAttribute);
+  lp_add_function_attr(function, -1, "nounwind", 8);
 
   if (gallivm_debug & GALLIVM_DEBUG_IR) {
  lp_debug_dump_value(function);
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_intr.h 
b/src/gallium/auxiliary/gallivm/lp_bld_intr.h
index 7d80ac2..b4558dc 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_intr.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_intr.h
@@ -60,13 +60,24 @@ lp_declare_intrinsic(LLVMModuleRef module,
  LLVMTypeRef *arg_types,
  unsigned num_args);
 
+void
+lp_remove_attr(LLVMValueRef value,
+   const char *attr_name,
+   unsigned att

[Mesa-dev] [PATCH] gallivm: Fix build after removal of deprecated attribute API v2

2016-11-07 Thread Tom Stellard

v2:
  Fix adding parameter attributes with LLVM < 4.0.
---
 src/gallium/auxiliary/draw/draw_llvm.c|  6 +-
 src/gallium/auxiliary/gallivm/lp_bld_intr.c   | 52 -
 src/gallium/auxiliary/gallivm/lp_bld_intr.h   | 13 -
 src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c |  4 +-
 src/gallium/drivers/radeonsi/si_shader.c  | 69 ---
 src/gallium/drivers/radeonsi/si_shader_tgsi_alu.c | 24 
 6 files changed, 116 insertions(+), 52 deletions(-)

diff --git a/src/gallium/auxiliary/draw/draw_llvm.c 
b/src/gallium/auxiliary/draw/draw_llvm.c
index 5b4e2a1..5d87318 100644
--- a/src/gallium/auxiliary/draw/draw_llvm.c
+++ b/src/gallium/auxiliary/draw/draw_llvm.c
@@ -1568,8 +1568,7 @@ draw_llvm_generate(struct draw_llvm *llvm, struct 
draw_llvm_variant *variant,
LLVMSetFunctionCallConv(variant_func, LLVMCCallConv);
for (i = 0; i < num_arg_types; ++i)
   if (LLVMGetTypeKind(arg_types[i]) == LLVMPointerTypeKind)
- LLVMAddAttribute(LLVMGetParam(variant_func, i),
-  LLVMNoAliasAttribute);
+ lp_add_function_attr(variant_func, i + 1, "noalias", 7);
 
context_ptr   = LLVMGetParam(variant_func, 0);
io_ptr= LLVMGetParam(variant_func, 1);
@@ -2193,8 +2192,7 @@ draw_gs_llvm_generate(struct draw_llvm *llvm,
 
for (i = 0; i < ARRAY_SIZE(arg_types); ++i)
   if (LLVMGetTypeKind(arg_types[i]) == LLVMPointerTypeKind)
- LLVMAddAttribute(LLVMGetParam(variant_func, i),
-  LLVMNoAliasAttribute);
+ lp_add_function_attr(variant_func, i + 1, "noalias", 7);
 
context_ptr   = LLVMGetParam(variant_func, 0);
input_array   = LLVMGetParam(variant_func, 1);
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_intr.c 
b/src/gallium/auxiliary/gallivm/lp_bld_intr.c
index f12e735..401e9a2 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_intr.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_intr.c
@@ -120,13 +120,57 @@ lp_declare_intrinsic(LLVMModuleRef module,
 }
 
 
+#if HAVE_LLVM < 0x0400
+static LLVMAttribute str_to_attr(const char *attr_name, unsigned attr_len)
+{
+   if (!strncmp("alwaysinline", attr_name, attr_len)) {
+  return LLVMAlwaysInlineAttribute;
+   } else if (!strncmp("byval", attr_name, attr_len)) {
+  return LLVMByValAttribute;
+   } else if (!strncmp("inreg", attr_name, attr_len)) {
+  return LLVMInRegAttribute;
+   } else if (!strncmp("noalias", attr_name, attr_len)) {
+  return LLVMNoAlliasAttribute;
+   } else if (!strncmp("readnone", attr_name, attr_len)) {
+  return LLVMReadNoneAttribute;
+   } else if (!strncmp("readonly", attr_name, attr_len)) {
+  return LLVMReadOnlyAttribute;
+   } else {
+  _debug_printf("Unhandled function attribute: %s\n", attr_name);
+  return 0;
+   }
+}
+#endif
+
+void
+lp_add_function_attr(LLVMValueRef function,
+ int attr_idx,
+ const char *attr_name,
+ unsigned attr_len)
+{
+
+#if HAVE_LLVM < 0x0400
+   LLVMAttribute attr = str_to_attr(attr_name, attr_len);
+   if (attr_idx == -1) {
+  LLVMAddFunctionAttr(function, attr);
+   } else {
+  LLVMAddAttribute(LLVMGetParam(function, attr_idx), attr);
+   }
+#else
+   LLVMContextRef context = 
LLVMGetModuleContext(LLVMGetGlobalParent(function));
+   unsigned kind_id = LLVMGetEnumAttributeKindForName(attr_name, attr_len);
+   LLVMAttributeRef attr = LLVMCreateEnumAttribute(context, kind_id, 0);
+   LLVMAddAttributeAtIndex(function, attr_idx, attr);
+#endif
+}
+
 LLVMValueRef
 lp_build_intrinsic(LLVMBuilderRef builder,
const char *name,
LLVMTypeRef ret_type,
LLVMValueRef *args,
unsigned num_args,
-   LLVMAttribute attr)
+   const char *attr_str)
 {
LLVMModuleRef module = 
LLVMGetGlobalParent(LLVMGetBasicBlockParent(LLVMGetInsertBlock(builder)));
LLVMValueRef function;
@@ -145,10 +189,14 @@ lp_build_intrinsic(LLVMBuilderRef builder,
 
   function = lp_declare_intrinsic(module, name, ret_type, arg_types, 
num_args);
 
+  if (attr_str) {
+ lp_add_function_attr(function, -1, attr_str, sizeof(attr_str));
+  }
+
   /* NoUnwind indicates that the intrinsic never raises a C++ exception.
* Set it for all intrinsics.
*/
-  LLVMAddFunctionAttr(function, attr | LLVMNoUnwindAttribute);
+  lp_add_function_attr(function, -1, "nounwind", 8);
 
   if (gallivm_debug & GALLIVM_DEBUG_IR) {
  lp_debug_dump_value(function);
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_intr.h 
b/src/gallium/auxiliary/gallivm/lp_bld_intr.h
index 7d80ac2..a058de4 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_intr.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_intr.h
@@ -60,13 +60,24 @@ lp_declare_intrinsic(LLVMModuleRef module,
  LLVMTypeRef *arg_types,

[Mesa-dev] [PATCH 2/2] llvmpipe: Fix build after removal of deprecated attribute API v2

2016-11-09 Thread Tom Stellard

From: Aaron Watry 

Applies on top of v3 of Tom's gallivm change.

v2:
  - Tom Stellard: Use enums instread of strings.

Signed-off-by: Aaron Watry 
CC: Tom Stellard 
CC: Jan Vesely 
---
 src/gallium/drivers/llvmpipe/lp_state_fs.c| 2 +-
 src/gallium/drivers/llvmpipe/lp_state_setup.c | 3 +--
 2 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c 
b/src/gallium/drivers/llvmpipe/lp_state_fs.c
index 3428eed..0910815 100644
--- a/src/gallium/drivers/llvmpipe/lp_state_fs.c
+++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c
@@ -2296,7 +2296,7 @@ generate_fragment(struct llvmpipe_context *lp,
 */
for(i = 0; i < ARRAY_SIZE(arg_types); ++i)
   if(LLVMGetTypeKind(arg_types[i]) == LLVMPointerTypeKind)
- LLVMAddAttribute(LLVMGetParam(function, i), LLVMNoAliasAttribute);
+ lp_add_function_attr(function, i + 1, LP_FUNC_ATTR_NOALIAS);
 
context_ptr  = LLVMGetParam(function, 0);
x= LLVMGetParam(function, 1);
diff --git a/src/gallium/drivers/llvmpipe/lp_state_setup.c 
b/src/gallium/drivers/llvmpipe/lp_state_setup.c
index a57e2f0..6b0df21 100644
--- a/src/gallium/drivers/llvmpipe/lp_state_setup.c
+++ b/src/gallium/drivers/llvmpipe/lp_state_setup.c
@@ -624,8 +624,7 @@ set_noalias(LLVMBuilderRef builder,
int i;
for(i = 0; i < nr_args; ++i)
   if(LLVMGetTypeKind(arg_types[i]) == LLVMPointerTypeKind)
- LLVMAddAttribute(LLVMGetParam(function, i),
-LLVMNoAliasAttribute);
+ lp_add_function_attr(function, i + 1, LP_FUNC_ATTR_NOALIAS);
 }
 
 static void
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

1 2 3 4 5 6 7 8 9 10 >

1 - 100 of 1703 matches

Mail list logo