On 24 Oct 17:18, Jakub Jelinek wrote: > On Fri, Oct 24, 2014 at 07:08:44PM +0400, Ilya Verbin wrote: > > On 24 Oct 16:35, Jakub Jelinek wrote: > > > On Thu, Oct 23, 2014 at 07:41:12PM +0400, Ilya Verbin wrote: > > > > > malloc can fail, SIGSEGV in response to that is not desirable. > > > > > Can't you fallback to alloca, or use just alloca, or use alloca > > > > > with malloc fallback? > > > > > > > > I replaced it with alloca. > > > > > > There is a risk if a suid or otherwise priviledge escalated program > > > uses it and attacker passes huge env vars. > > > Perhaps use alloca if it is <= 2KB and malloc otherwise, and in that case > > > if malloc fails, just do a fatal error? > > > > Why is this more preferable than just a malloc + fatal error? > > This function is executed only once at plugin initialization, therefore no > > real > > performance gain could be achived. > > Even if it is executed once, using malloc for short env vars that will be > the 99% of all cases sounds like waste of resources to me. > You already know the strlen of the vars, so it is just a matter of > comparing that and setting a bool flag.
Done. Is it ok? Thanks, -- Ilya --- diff --git a/liboffloadmic/configure.ac b/liboffloadmic/configure.ac index fb575b3..81fae8f 100644 --- a/liboffloadmic/configure.ac +++ b/liboffloadmic/configure.ac @@ -42,6 +42,7 @@ AC_PROG_CC AC_PROG_CXX AC_CONFIG_FILES([Makefile liboffloadmic_host.spec liboffloadmic_target.spec]) AM_ENABLE_MULTILIB(, ..) +AC_CONFIG_SUBDIRS(plugin) AC_FUNC_ALLOCA AC_CHECK_HEADERS([mm_malloc.h], [], [AC_MSG_ERROR(["Couldn't find mm_malloc.h"])]) AC_CHECK_FUNCS([__secure_getenv secure_getenv]) diff --git a/liboffloadmic/plugin/Makefile.am b/liboffloadmic/plugin/Makefile.am new file mode 100644 index 0000000..0baf70d --- /dev/null +++ b/liboffloadmic/plugin/Makefile.am @@ -0,0 +1,123 @@ +# Plugin for offload execution on Intel MIC devices. +# +# Copyright (C) 2014 Free Software Foundation, Inc. +# +# Contributed by Ilya Verbin <ilya.ver...@intel.com> and +# Andrey Turetskiy <andrey.turets...@intel.com>. +# +# This file is part of the GNU OpenMP Library (libgomp). +# +# Libgomp is free software; you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3, or (at your option) +# any later version. +# +# Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY +# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +# FOR A PARTICULAR PURPOSE. See the GNU General Public License for +# more details. +# +# Under Section 7 of GPL version 3, you are granted additional +# permissions described in the GCC Runtime Library Exception, version +# 3.1, as published by the Free Software Foundation. +# +# You should have received a copy of the GNU General Public License and +# a copy of the GCC Runtime Library Exception along with this program; +# see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +# <http://www.gnu.org/licenses/>. + + +AUTOMAKE_OPTIONS = foreign +ACLOCAL_AMFLAGS = -I ../.. -I ../../config + +# Directories +build_dir = $(top_builddir) +source_dir = $(top_srcdir) +coi_inc_dir = $(top_srcdir)/../include/coi +myo_inc_dir = $(top_srcdir)/../include/myo +libgomp_src_dir = $(top_srcdir)/../../libgomp +libgomp_dir = $(build_dir)/../../libgomp +liboffload_src_dir = $(top_srcdir)/../runtime +liboffload_dir = $(top_builddir)/.. + +# May be used by toolexeclibdir. +gcc_version := $(shell cat $(top_srcdir)/../../gcc/BASE-VER) +libsubincludedir = $(libdir)/gcc/$(target_alias)/$(gcc_version)$(MULTISUBDIR)/include +# Search for main_target_image.h in these directories +target_prefix_dir = $(libdir)/gcc/$(accel_target)/$(gcc_version)$(MULTISUBDIR) +target_build_dir = $(accel_search_dir)/$(accel_target)$(MULTISUBDIR)/liboffloadmic/plugin +target_install_dir = $(accel_search_dir)/lib/gcc/$(accel_target)/$(gcc_version)$(MULTISUBDIR) + +if PLUGIN_HOST + toolexeclib_LTLIBRARIES = libgomp-plugin-intelmic.la + libgomp_plugin_intelmic_la_SOURCES = libgomp-plugin-intelmic.cpp + libgomp_plugin_intelmic_la_CPPFLAGS = $(CPPFLAGS) -DLINUX -DCOI_LIBRARY_VERSION=2 -DMYO_SUPPORT -DOFFLOAD_DEBUG=1 -DSEP_SUPPORT -DTIMING_SUPPORT -DHOST_LIBRARY=1 -I$(coi_inc_dir) -I$(myo_inc_dir) -I$(liboffload_src_dir) -I$(libgomp_src_dir) -I$(libgomp_dir) -I$(target_prefix_dir)/include -I$(target_build_dir) -I$(target_install_dir)/include + libgomp_plugin_intelmic_la_LDFLAGS = -L$(liboffload_dir)/.libs -loffloadmic_host -version-info 1:0:0 +else # PLUGIN_TARGET + plugin_includedir = $(libsubincludedir) + plugin_include_HEADERS = main_target_image.h + AM_CPPFLAGS = $(CPPFLAGS) -DLINUX -DCOI_LIBRARY_VERSION=2 -DMYO_SUPPORT -DOFFLOAD_DEBUG=1 -DSEP_SUPPORT -DTIMING_SUPPORT -DHOST_LIBRARY=0 -I$(coi_inc_dir) -I$(myo_inc_dir) -I$(liboffload_src_dir) -I$(libgomp_dir) + AM_CXXFLAGS = $(CXXFLAGS) + AM_LDFLAGS = -L$(liboffload_dir)/.libs -L$(libgomp_dir)/.libs -loffloadmic_target -lcoi_device -lmyo-service -lgomp -rdynamic +endif + +main_target_image.h: offload_target_main + @echo -n "const int image_size = " > $@ + @stat -c '%s' $< >> $@ + @echo ";" >> $@ + @echo "struct MainTargetImage {" >> $@ + @echo " int64_t size;" >> $@ + @echo " char name[sizeof \"offload_target_main\"];" >> $@ + @echo " char data[image_size];" >> $@ + @echo "};" >> $@ + @echo "extern \"C\" const MainTargetImage main_target_image = {" >> $@ + @echo " image_size, \"offload_target_main\"," >> $@ + @cat $< | xxd -include >> $@ + @echo "};" >> $@ + +offload_target_main: $(liboffload_dir)/ofldbegin.o offload_target_main.o $(liboffload_dir)/ofldend.o + $(CXX) $(AM_LDFLAGS) $^ -o $@ + +offload_target_main.o: offload_target_main.cpp + $(CXX) $(AM_CXXFLAGS) $(AM_CPPFLAGS) -c $< -o $@ + +# Work around what appears to be a GNU make bug handling MAKEFLAGS +# values defined in terms of make variables, as is the case for CC and +# friends when we are called from the top level Makefile. +AM_MAKEFLAGS = \ + "AR_FLAGS=$(AR_FLAGS)" \ + "CC_FOR_BUILD=$(CC_FOR_BUILD)" \ + "CFLAGS=$(CFLAGS)" \ + "CXXFLAGS=$(CXXFLAGS)" \ + "CFLAGS_FOR_BUILD=$(CFLAGS_FOR_BUILD)" \ + "CFLAGS_FOR_TARGET=$(CFLAGS_FOR_TARGET)" \ + "INSTALL=$(INSTALL)" \ + "INSTALL_DATA=$(INSTALL_DATA)" \ + "INSTALL_PROGRAM=$(INSTALL_PROGRAM)" \ + "INSTALL_SCRIPT=$(INSTALL_SCRIPT)" \ + "JC1FLAGS=$(JC1FLAGS)" \ + "LDFLAGS=$(LDFLAGS)" \ + "LIBCFLAGS=$(LIBCFLAGS)" \ + "LIBCFLAGS_FOR_TARGET=$(LIBCFLAGS_FOR_TARGET)" \ + "MAKE=$(MAKE)" \ + "MAKEINFO=$(MAKEINFO) $(MAKEINFOFLAGS)" \ + "PICFLAG=$(PICFLAG)" \ + "PICFLAG_FOR_TARGET=$(PICFLAG_FOR_TARGET)" \ + "SHELL=$(SHELL)" \ + "RUNTESTFLAGS=$(RUNTESTFLAGS)" \ + "exec_prefix=$(exec_prefix)" \ + "infodir=$(infodir)" \ + "libdir=$(libdir)" \ + "prefix=$(prefix)" \ + "includedir=$(includedir)" \ + "AR=$(AR)" \ + "AS=$(AS)" \ + "LD=$(LD)" \ + "LIBCFLAGS=$(LIBCFLAGS)" \ + "NM=$(NM)" \ + "PICFLAG=$(PICFLAG)" \ + "RANLIB=$(RANLIB)" \ + "DESTDIR=$(DESTDIR)" + +MAKEOVERRIDES = + diff --git a/liboffloadmic/plugin/configure.ac b/liboffloadmic/plugin/configure.ac new file mode 100644 index 0000000..283faad --- /dev/null +++ b/liboffloadmic/plugin/configure.ac @@ -0,0 +1,135 @@ +# Plugin for offload execution on Intel MIC devices. +# +# Copyright (C) 2014 Free Software Foundation, Inc. +# +# Contributed by Andrey Turetskiy <andrey.turets...@intel.com>. +# +# This file is part of the GNU OpenMP Library (libgomp). +# +# Libgomp is free software; you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3, or (at your option) +# any later version. +# +# Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY +# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +# FOR A PARTICULAR PURPOSE. See the GNU General Public License for +# more details. +# +# Under Section 7 of GPL version 3, you are granted additional +# permissions described in the GCC Runtime Library Exception, version +# 3.1, as published by the Free Software Foundation. +# +# You should have received a copy of the GNU General Public License and +# a copy of the GCC Runtime Library Exception along with this program; +# see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +# <http://www.gnu.org/licenses/>. + +# Process this file with autoconf to produce a configure script, like so: +# aclocal -I ../.. -I ../../config && autoconf && automake + +AC_PREREQ([2.64]) +AC_INIT([Intel MIC Offload Plugin], [1.0], ,[libgomp-plugin-intelmic]) + +AC_CONFIG_AUX_DIR(../..) + +AC_CANONICAL_SYSTEM +target_alias=${target_alias-$host_alias} +AC_SUBST(target_alias) + +AM_INIT_AUTOMAKE([1.9.0 foreign no-dist]) + +AM_MAINTAINER_MODE + +AC_PROG_CC +AC_PROG_CXX +AC_CONFIG_FILES([Makefile]) +AM_ENABLE_MULTILIB(, ../..) + +if test "${multilib}" = "yes"; then + multilib_arg="--enable-multilib" +else + multilib_arg= +fi + +# Make sure liboffloadmic is enabled +case "$enable_liboffloadmic" in + host | target) + ;; + *) + AC_MSG_ERROR([Liboffloadmic is disabled]) ;; +esac +AM_CONDITIONAL(PLUGIN_HOST, [test x"$enable_liboffloadmic" = xhost]) + +# Get accel target and path to build or install tree of accel compiler +accel_search_dir= +accel_target= +if test x"$enable_liboffloadmic" = xhost; then + for accel in `echo $enable_offload_targets | sed -e 's#,# #g'`; do + accel_name=`echo $accel | sed 's/=.*//'` + accel_dir=`echo $accel | grep '=' | sed 's/.*=//'` + case "$accel_name" in + *-intelmic-* | *-intelmicemul-*) + accel_target=$accel_name + accel_search_dir=$accel_dir + ;; + esac + done + if test x"$accel_target" = x; then + AC_MSG_ERROR([--enable-offload-targets does not contain intelmic target]) + fi +fi +AC_SUBST(accel_search_dir) +AC_SUBST(accel_target) + +AC_MSG_CHECKING([for --enable-version-specific-runtime-libs]) +AC_ARG_ENABLE([version-specific-runtime-libs], + AC_HELP_STRING([--enable-version-specific-runtime-libs], + [Specify that runtime libraries should be installed in a compiler-specific directory]), + [case "$enableval" in + yes) enable_version_specific_runtime_libs=yes ;; + no) enable_version_specific_runtime_libs=no ;; + *) AC_MSG_ERROR([Unknown argument to enable/disable version-specific libs]);; + esac], + [enable_version_specific_runtime_libs=no]) +AC_MSG_RESULT($enable_version_specific_runtime_libs) + + +# Calculate toolexeclibdir. +# Also toolexecdir, though it's only used in toolexeclibdir. +case ${enable_version_specific_runtime_libs} in + yes) + # Need the gcc compiler version to know where to install libraries + # and header files if --enable-version-specific-runtime-libs option + # is selected. + toolexecdir='$(libdir)/gcc/$(target_alias)' + toolexeclibdir='$(toolexecdir)/$(gcc_version)$(MULTISUBDIR)' + ;; + no) + if test -n "$with_cross_host" && + test x"$with_cross_host" != x"no"; then + # Install a library built with a cross compiler in tooldir, not libdir. + toolexecdir='$(exec_prefix)/$(target_alias)' + toolexeclibdir='$(toolexecdir)/lib' + else + toolexecdir='$(libdir)/gcc-lib/$(target_alias)' + toolexeclibdir='$(libdir)' + fi + multi_os_directory=`$CC -print-multi-os-directory` + case $multi_os_directory in + .) ;; # Avoid trailing /. + *) toolexeclibdir=$toolexeclibdir/$multi_os_directory ;; + esac + ;; +esac + +AC_LIBTOOL_DLOPEN +AM_PROG_LIBTOOL +# Forbid libtool to hardcode RPATH, because we want to be able to specify +# library search directory using LD_LIBRARY_PATH +hardcode_into_libs=no +AC_SUBST(toolexecdir) +AC_SUBST(toolexeclibdir) + +# Must be last +AC_OUTPUT diff --git a/liboffloadmic/plugin/libgomp-plugin-intelmic.cpp b/liboffloadmic/plugin/libgomp-plugin-intelmic.cpp new file mode 100644 index 0000000..22d8625 --- /dev/null +++ b/liboffloadmic/plugin/libgomp-plugin-intelmic.cpp @@ -0,0 +1,448 @@ +/* Plugin for offload execution on Intel MIC devices. + + Copyright (C) 2014 Free Software Foundation, Inc. + + Contributed by Ilya Verbin <ilya.ver...@intel.com>. + + This file is part of the GNU OpenMP Library (libgomp). + + Libgomp is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + <http://www.gnu.org/licenses/>. */ + +/* Host side part of a libgomp plugin. */ + +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <utility> +#include <vector> +#include <libgomp_target.h> +#include "compiler_if_host.h" +#include "main_target_image.h" + +#define LD_LIBRARY_PATH_ENV "LD_LIBRARY_PATH" +#define MIC_LD_LIBRARY_PATH_ENV "MIC_LD_LIBRARY_PATH" + +#ifdef DEBUG +#define TRACE(...) \ +{ \ +fprintf (stderr, "HOST:\t%s:%s ", __FILE__, __FUNCTION__); \ +fprintf (stderr, __VA_ARGS__); \ +fprintf (stderr, "\n"); \ +} +#else +#define TRACE { } +#endif + + +static VarDesc vd_host2tgt = { + { 1, 1 }, /* dst, src */ + { 1, 0 }, /* in, out */ + 1, /* alloc_if */ + 1, /* free_if */ + 4, /* align */ + 0, /* mic_offset */ + { 0, 0, 0, 0, 0, 0, 0, 0 }, /* is_static, is_static_dstn, has_length, + is_stack_buf, sink_addr, alloc_disp, + is_noncont_src, is_noncont_dst */ + 0, /* offset */ + 0, /* size */ + 1, /* count */ + 0, /* alloc */ + 0, /* into */ + 0 /* ptr */ +}; + +static VarDesc vd_tgt2host = { + { 1, 1 }, /* dst, src */ + { 0, 1 }, /* in, out */ + 1, /* alloc_if */ + 1, /* free_if */ + 4, /* align */ + 0, /* mic_offset */ + { 0, 0, 0, 0, 0, 0, 0, 0 }, /* is_static, is_static_dstn, has_length, + is_stack_buf, sink_addr, alloc_disp, + is_noncont_src, is_noncont_dst */ + 0, /* offset */ + 0, /* size */ + 1, /* count */ + 0, /* alloc */ + 0, /* into */ + 0 /* ptr */ +}; + + +/* Total number of shared libraries with offloading to Intel MIC. */ +static int num_libraries; + +/* Pointers to the descriptors, containing pointers to host-side tables and to + target images. */ +static std::vector< std::pair<void *, void *> > lib_descrs; + +/* Thread-safe registration of the main image. */ +static pthread_once_t main_image_is_registered = PTHREAD_ONCE_INIT; + + +/* Add path specified in LD_LIBRARY_PATH to MIC_LD_LIBRARY_PATH, which is + required by liboffloadmic. */ +__attribute__((constructor)) +static void +set_mic_lib_path (void) +{ + bool use_alloca; + const char *ld_lib_path = getenv (LD_LIBRARY_PATH_ENV); + const char *mic_lib_path = getenv (MIC_LD_LIBRARY_PATH_ENV); + char *mic_lib_path_new; + size_t len; + + if (!ld_lib_path) + return; + + len = (mic_lib_path ? strlen (mic_lib_path) : 0) + strlen (ld_lib_path) + 2; + use_alloca = len <= 2048; + + mic_lib_path_new = (char *) (use_alloca ? alloca (len) : malloc (len)); + if (!mic_lib_path_new) + { + fprintf (stderr, "%s: Can't allocate memory\n", __FILE__); + exit (1); + } + + if (!mic_lib_path) + strcpy (mic_lib_path_new, ld_lib_path); + else + sprintf (mic_lib_path_new, "%s:%s", mic_lib_path, ld_lib_path); + setenv (MIC_LD_LIBRARY_PATH_ENV, mic_lib_path_new, 1); + + if (!use_alloca) + free (mic_lib_path_new); +} + +extern "C" enum offload_target_type +GOMP_OFFLOAD_get_type (void) +{ + enum offload_target_type res = OFFLOAD_TARGET_TYPE_INTEL_MIC; + TRACE ("(): return %d", res); + return res; +} + +extern "C" int +GOMP_OFFLOAD_get_num_devices (void) +{ + int res = _Offload_number_of_devices (); + TRACE ("(): return %d", res); + return res; +} + +/* This should be called from every shared library with offloading. */ +extern "C" void +GOMP_OFFLOAD_register_image (void *host_table, void *target_image) +{ + TRACE ("(host_table = %p, target_image = %p)", host_table, target_image); + lib_descrs.push_back (std::make_pair (host_table, target_image)); + num_libraries++; +} + +static void +offload (const char *file, uint64_t line, int device, const char *name, + int num_vars, VarDesc *vars, VarDesc2 *vars2) +{ + OFFLOAD ofld = __offload_target_acquire1 (&device, file, line); + if (ofld) + __offload_offload1 (ofld, name, 0, num_vars, vars, vars2, 0, NULL, NULL); + else + { + fprintf (stderr, "%s:%d: Offload target acquire failed\n", file, line); + exit (1); + } +} + +static void +register_main_image () +{ + __offload_register_image (&main_target_image); +} + +/* Load offload_target_main on target. */ +extern "C" void +GOMP_OFFLOAD_init_device (int device) +{ + TRACE (""); + pthread_once (&main_image_is_registered, register_main_image); + offload (__FILE__, __LINE__, device, "__offload_target_init_proc", 0, + NULL, NULL); +} + +static void +get_target_table (int device, int &num_funcs, int &num_vars, void **&table) +{ + VarDesc vd1[2] = { vd_tgt2host, vd_tgt2host }; + vd1[0].ptr = &num_funcs; + vd1[0].size = sizeof (num_funcs); + vd1[1].ptr = &num_vars; + vd1[1].size = sizeof (num_vars); + VarDesc2 vd1g[2] = { { "num_funcs", 0 }, { "num_vars", 0 } }; + + offload (__FILE__, __LINE__, device, "__offload_target_table_p1", 2, + vd1, vd1g); + + int table_size = num_funcs + 2 * num_vars; + if (table_size > 0) + { + table = new void * [table_size]; + + VarDesc vd2; + vd2 = vd_tgt2host; + vd2.ptr = table; + vd2.size = table_size * sizeof (void *); + VarDesc2 vd2g = { "table", 0 }; + + offload (__FILE__, __LINE__, device, "__offload_target_table_p2", 1, + &vd2, &vd2g); + } +} + +static void +load_lib_and_get_table (int device, int lib_num, mapping_table *&table, + int &table_size) +{ + struct TargetImage { + int64_t size; + /* 10 characters is enough for max int value. */ + char name[sizeof ("lib0000000000.so")]; + char data[]; + } __attribute__ ((packed)); + + void ***host_table_descr = (void ***) lib_descrs[lib_num].first; + void **host_func_start = host_table_descr[0]; + void **host_func_end = host_table_descr[1]; + void **host_var_start = host_table_descr[2]; + void **host_var_end = host_table_descr[3]; + + void **target_image_descr = (void **) lib_descrs[lib_num].second; + void *image_start = target_image_descr[0]; + void *image_end = target_image_descr[1]; + + TRACE ("() host_table_descr { %p, %p, %p, %p }", host_func_start, + host_func_end, host_var_start, host_var_end); + TRACE ("() target_image_descr { %p, %p }", image_start, image_end); + + int64_t image_size = (uintptr_t) image_end - (uintptr_t) image_start; + TargetImage *image + = (TargetImage *) malloc (sizeof (int64_t) + sizeof ("lib0000000000.so") + + image_size); + if (!image) + { + fprintf (stderr, "%s: Can't allocate memory\n", __FILE__); + exit (1); + } + + image->size = image_size; + sprintf (image->name, "lib%010d.so", lib_num); + memcpy (image->data, image_start, image->size); + + TRACE ("() __offload_register_image %s { %p, %d }", + image->name, image_start, image->size); + __offload_register_image (image); + + int tgt_num_funcs = 0; + int tgt_num_vars = 0; + void **tgt_table = NULL; + get_target_table (device, tgt_num_funcs, tgt_num_vars, tgt_table); + free (image); + + /* The func table contains only addresses, the var table contains addresses + and corresponding sizes. */ + int host_num_funcs = host_func_end - host_func_start; + int host_num_vars = (host_var_end - host_var_start) / 2; + TRACE ("() host_num_funcs = %d, tgt_num_funcs = %d", + host_num_funcs, tgt_num_funcs); + TRACE ("() host_num_vars = %d, tgt_num_vars = %d", + host_num_vars, tgt_num_vars); + if (host_num_funcs != tgt_num_funcs) + { + fprintf (stderr, "%s: Can't map target functions\n", __FILE__); + exit (1); + } + if (host_num_vars != tgt_num_vars) + { + fprintf (stderr, "%s: Can't map target variables\n", __FILE__); + exit (1); + } + + table = (mapping_table *) realloc (table, (table_size + host_num_funcs + + host_num_vars) + * sizeof (mapping_table)); + if (table == NULL) + { + fprintf (stderr, "%s: Can't allocate memory\n", __FILE__); + exit (1); + } + + for (int i = 0; i < host_num_funcs; i++) + { + mapping_table t; + t.host_start = (uintptr_t) host_func_start[i]; + t.host_end = t.host_start + 1; + t.tgt_start = (uintptr_t) tgt_table[i]; + t.tgt_end = t.tgt_start + 1; + + TRACE ("() lib %d, func %d:\t0x%llx -- 0x%llx", + lib_num, i, t.host_start, t.tgt_start); + + table[table_size++] = t; + } + + for (int i = 0; i < host_num_vars * 2; i += 2) + { + mapping_table t; + t.host_start = (uintptr_t) host_var_start[i]; + t.host_end = t.host_start + (uintptr_t) host_var_start[i+1]; + t.tgt_start = (uintptr_t) tgt_table[tgt_num_funcs+i]; + t.tgt_end = t.tgt_start + (uintptr_t) tgt_table[tgt_num_funcs+i+1]; + + TRACE ("() lib %d, var %d:\t0x%llx (%d) -- 0x%llx (%d)", lib_num, i/2, + t.host_start, t.host_end - t.host_start, + t.tgt_start, t.tgt_end - t.tgt_start); + + table[table_size++] = t; + } + + delete [] tgt_table; +} + +extern "C" int +GOMP_OFFLOAD_get_table (int device, void *result) +{ + TRACE ("(num_libraries = %d)", num_libraries); + + mapping_table *table = NULL; + int table_size = 0; + + for (int i = 0; i < num_libraries; i++) + load_lib_and_get_table (device, i, table, table_size); + + *(void **) result = table; + return table_size; +} + +extern "C" void * +GOMP_OFFLOAD_alloc (int device, size_t size) +{ + TRACE ("(size = %d)", size); + + void *tgt_ptr; + VarDesc vd1[2] = { vd_host2tgt, vd_tgt2host }; + vd1[0].ptr = &size; + vd1[0].size = sizeof (size); + vd1[1].ptr = &tgt_ptr; + vd1[1].size = sizeof (void *); + VarDesc2 vd1g[2] = { { "size", 0 }, { "tgt_ptr", 0 } }; + + offload (__FILE__, __LINE__, device, "__offload_target_alloc", 2, vd1, vd1g); + + return tgt_ptr; +} + +extern "C" void +GOMP_OFFLOAD_free (int device, void *tgt_ptr) +{ + TRACE ("(tgt_ptr = %p)", tgt_ptr); + + VarDesc vd1 = vd_host2tgt; + vd1.ptr = &tgt_ptr; + vd1.size = sizeof (void *); + VarDesc2 vd1g = { "tgt_ptr", 0 }; + + offload (__FILE__, __LINE__, device, "__offload_target_free", 1, &vd1, &vd1g); +} + +extern "C" void * +GOMP_OFFLOAD_host2dev (int device, void *tgt_ptr, const void *host_ptr, + size_t size) +{ + TRACE ("(tgt_ptr = %p, host_ptr = %p, size = %d)", tgt_ptr, host_ptr, size); + if (!size) + return tgt_ptr; + + VarDesc vd1[2] = { vd_host2tgt, vd_host2tgt }; + vd1[0].ptr = &tgt_ptr; + vd1[0].size = sizeof (void *); + vd1[1].ptr = &size; + vd1[1].size = sizeof (size); + VarDesc2 vd1g[2] = { { "tgt_ptr", 0 }, { "size", 0 } }; + + offload (__FILE__, __LINE__, device, "__offload_target_host2tgt_p1", 2, + vd1, vd1g); + + VarDesc vd2 = vd_host2tgt; + vd2.ptr = (void *) host_ptr; + vd2.size = size; + VarDesc2 vd2g = { "var", 0 }; + + offload (__FILE__, __LINE__, device, "__offload_target_host2tgt_p2", 1, + &vd2, &vd2g); + + return tgt_ptr; +} + +extern "C" void * +GOMP_OFFLOAD_dev2host (int device, void *host_ptr, const void *tgt_ptr, + size_t size) +{ + TRACE ("(host_ptr = %p, tgt_ptr = %p, size = %d)", host_ptr, tgt_ptr, size); + if (!size) + return host_ptr; + + VarDesc vd1[2] = { vd_host2tgt, vd_host2tgt }; + vd1[0].ptr = &tgt_ptr; + vd1[0].size = sizeof (void *); + vd1[1].ptr = &size; + vd1[1].size = sizeof (size); + VarDesc2 vd1g[2] = { { "tgt_ptr", 0 }, { "size", 0 } }; + + offload (__FILE__, __LINE__, device, "__offload_target_tgt2host_p1", 2, + vd1, vd1g); + + VarDesc vd2 = vd_tgt2host; + vd2.ptr = (void *) host_ptr; + vd2.size = size; + VarDesc2 vd2g = { "var", 0 }; + + offload (__FILE__, __LINE__, device, "__offload_target_tgt2host_p2", 1, + &vd2, &vd2g); + + return host_ptr; +} + +extern "C" void +GOMP_OFFLOAD_run (int device, void *tgt_fn, void *tgt_vars) +{ + TRACE ("(tgt_fn = %p, tgt_vars = %p)", tgt_fn, tgt_vars); + + VarDesc vd1[2] = { vd_host2tgt, vd_host2tgt }; + vd1[0].ptr = &tgt_fn; + vd1[0].size = sizeof (void *); + vd1[1].ptr = &tgt_vars; + vd1[1].size = sizeof (void *); + VarDesc2 vd1g[2] = { { "tgt_fn", 0 }, { "tgt_vars", 0 } }; + + offload (__FILE__, __LINE__, device, "__offload_target_run", 2, vd1, vd1g); +} diff --git a/liboffloadmic/plugin/offload_target_main.cpp b/liboffloadmic/plugin/offload_target_main.cpp new file mode 100644 index 0000000..4a2778e --- /dev/null +++ b/liboffloadmic/plugin/offload_target_main.cpp @@ -0,0 +1,366 @@ +/* Plugin for offload execution on Intel MIC devices. + + Copyright (C) 2014 Free Software Foundation, Inc. + + Contributed by Ilya Verbin <ilya.ver...@intel.com>. + + This file is part of the GNU OpenMP Library (libgomp). + + Libgomp is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + <http://www.gnu.org/licenses/>. */ + +/* Target side part of a libgomp plugin. */ + +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include "compiler_if_target.h" + + +#ifdef DEBUG +#define TRACE(...) \ +{ \ +fprintf (stderr, "TARGET:\t%s:%s ", __FILE__, __FUNCTION__); \ +fprintf (stderr, __VA_ARGS__); \ +fprintf (stderr, "\n"); \ +} +#else +#define TRACE { } +#endif + + +static VarDesc vd_host2tgt = { + { 1, 1 }, /* dst, src */ + { 1, 0 }, /* in, out */ + 1, /* alloc_if */ + 1, /* free_if */ + 4, /* align */ + 0, /* mic_offset */ + { 0, 0, 0, 0, 0, 0, 0, 0 }, /* is_static, is_static_dstn, has_length, + is_stack_buf, sink_addr, alloc_disp, + is_noncont_src, is_noncont_dst */ + 0, /* offset */ + 0, /* size */ + 1, /* count */ + 0, /* alloc */ + 0, /* into */ + 0 /* ptr */ +}; + +static VarDesc vd_tgt2host = { + { 1, 1 }, /* dst, src */ + { 0, 1 }, /* in, out */ + 1, /* alloc_if */ + 1, /* free_if */ + 4, /* align */ + 0, /* mic_offset */ + { 0, 0, 0, 0, 0, 0, 0, 0 }, /* is_static, is_static_dstn, has_length, + is_stack_buf, sink_addr, alloc_disp, + is_noncont_src, is_noncont_dst */ + 0, /* offset */ + 0, /* size */ + 1, /* count */ + 0, /* alloc */ + 0, /* into */ + 0 /* ptr */ +}; + +/* Pointer to the descriptor of the last loaded shared library. */ +static void *last_loaded_library = NULL; + +/* Pointer and size of the variable, used in __offload_target_host2tgt_p[12] + and __offload_target_tgt2host_p[12]. */ +static void *last_var_ptr = NULL; +static int last_var_size = 0; + + +/* Override the corresponding functions from libgomp. */ +extern "C" int +omp_is_initial_device (void) __GOMP_NOTHROW +{ + return 0; +} + +extern "C" int32_t +omp_is_initial_device_ (void) +{ + return omp_is_initial_device (); +} + + +/* Dummy function needed for the initialization of target process during the + first call to __offload_offload1. */ +static void +__offload_target_init_proc (OFFLOAD ofldt) +{ + TRACE (""); +} + +/* Collect addresses of the offload functions and of the global variables from + the library descriptor and send them to host. + Part 1: Send num_funcs and num_vars to host. */ +static void +__offload_target_table_p1 (OFFLOAD ofldt) +{ + void ***lib_descr = (void ***) last_loaded_library; + + if (lib_descr == NULL) + { + TRACE (""); + fprintf (stderr, "Error! No shared libraries loaded on target.\n"); + return; + } + + void **func_table_begin = lib_descr[0]; + void **func_table_end = lib_descr[1]; + void **var_table_begin = lib_descr[2]; + void **var_table_end = lib_descr[3]; + + /* The func table contains only addresses, the var table contains addresses + and corresponding sizes. */ + int num_funcs = func_table_end - func_table_begin; + int num_vars = (var_table_end - var_table_begin) / 2; + TRACE ("(num_funcs = %d, num_vars = %d)", num_funcs, num_vars); + + VarDesc vd1[2] = { vd_tgt2host, vd_tgt2host }; + vd1[0].ptr = &num_funcs; + vd1[0].size = sizeof (num_funcs); + vd1[1].ptr = &num_vars; + vd1[1].size = sizeof (num_vars); + VarDesc2 vd2[2] = { { "num_funcs", 0 }, { "num_vars", 0 } }; + + __offload_target_enter (ofldt, 2, vd1, vd2); + __offload_target_leave (ofldt); +} + +/* Part 2: Send the table with addresses to host. */ +static void +__offload_target_table_p2 (OFFLOAD ofldt) +{ + void ***lib_descr = (void ***) last_loaded_library; + void **func_table_begin = lib_descr[0]; + void **func_table_end = lib_descr[1]; + void **var_table_begin = lib_descr[2]; + void **var_table_end = lib_descr[3]; + + int num_funcs = func_table_end - func_table_begin; + int num_vars = (var_table_end - var_table_begin) / 2; + int table_size = (num_funcs + 2 * num_vars) * sizeof (void *); + void **table = (void **) malloc (table_size); + TRACE ("(table_size = %d)", table_size); + + VarDesc vd1; + vd1 = vd_tgt2host; + vd1.ptr = table; + vd1.size = table_size; + VarDesc2 vd2 = { "table", 0 }; + + __offload_target_enter (ofldt, 1, &vd1, &vd2); + + void **p; + int i = 0; + for (p = func_table_begin; p < func_table_end; p++, i++) + table[i] = *p; + + for (p = var_table_begin; p < var_table_end; p++, i++) + table[i] = *p; + + __offload_target_leave (ofldt); + free (table); +} + +/* Allocate size bytes and send a pointer to the allocated memory to host. */ +static void +__offload_target_alloc (OFFLOAD ofldt) +{ + size_t size = 0; + void *ptr = NULL; + + VarDesc vd1[2] = { vd_host2tgt, vd_tgt2host }; + vd1[0].ptr = &size; + vd1[0].size = sizeof (size); + vd1[1].ptr = &ptr; + vd1[1].size = sizeof (void *); + VarDesc2 vd2[2] = { { "size", 0 }, { "ptr", 0 } }; + + __offload_target_enter (ofldt, 2, vd1, vd2); + ptr = malloc (size); + TRACE ("(size = %d): ptr = %p", size, ptr); + __offload_target_leave (ofldt); +} + +/* Free the memory space pointed to by ptr. */ +static void +__offload_target_free (OFFLOAD ofldt) +{ + void *ptr = 0; + + VarDesc vd1 = vd_host2tgt; + vd1.ptr = &ptr; + vd1.size = sizeof (void *); + VarDesc2 vd2 = { "ptr", 0 }; + + __offload_target_enter (ofldt, 1, &vd1, &vd2); + TRACE ("(ptr = %p)", ptr); + free (ptr); + __offload_target_leave (ofldt); +} + +/* Receive var_size bytes from host and store to var_ptr. + Part 1: Receive var_ptr and var_size from host. */ +static void +__offload_target_host2tgt_p1 (OFFLOAD ofldt) +{ + void *var_ptr = NULL; + size_t var_size = 0; + + VarDesc vd1[2] = { vd_host2tgt, vd_host2tgt }; + vd1[0].ptr = &var_ptr; + vd1[0].size = sizeof (void *); + vd1[1].ptr = &var_size; + vd1[1].size = sizeof (var_size); + VarDesc2 vd2[2] = { { "var_ptr", 0 }, { "var_size", 0 } }; + + __offload_target_enter (ofldt, 2, vd1, vd2); + TRACE ("(var_ptr = %p, var_size = %d)", var_ptr, var_size); + last_var_ptr = var_ptr; + last_var_size = var_size; + __offload_target_leave (ofldt); +} + +/* Part 2: Receive the data from host. */ +static void +__offload_target_host2tgt_p2 (OFFLOAD ofldt) +{ + TRACE ("(last_var_ptr = %p, last_var_size = %d)", + last_var_ptr, last_var_size); + + VarDesc vd1 = vd_host2tgt; + vd1.ptr = last_var_ptr; + vd1.size = last_var_size; + VarDesc2 vd2 = { "var", 0 }; + + __offload_target_enter (ofldt, 1, &vd1, &vd2); + __offload_target_leave (ofldt); +} + +/* Send var_size bytes from var_ptr to host. + Part 1: Receive var_ptr and var_size from host. */ +static void +__offload_target_tgt2host_p1 (OFFLOAD ofldt) +{ + void *var_ptr = NULL; + size_t var_size = 0; + + VarDesc vd1[2] = { vd_host2tgt, vd_host2tgt }; + vd1[0].ptr = &var_ptr; + vd1[0].size = sizeof (void *); + vd1[1].ptr = &var_size; + vd1[1].size = sizeof (var_size); + VarDesc2 vd2[2] = { { "var_ptr", 0 }, { "var_size", 0 } }; + + __offload_target_enter (ofldt, 2, vd1, vd2); + TRACE ("(var_ptr = %p, var_size = %d)", var_ptr, var_size); + last_var_ptr = var_ptr; + last_var_size = var_size; + __offload_target_leave (ofldt); +} + +/* Part 2: Send the data to host. */ +static void +__offload_target_tgt2host_p2 (OFFLOAD ofldt) +{ + TRACE ("(last_var_ptr = %p, last_var_size = %d)", + last_var_ptr, last_var_size); + + VarDesc vd1 = vd_tgt2host; + vd1.ptr = last_var_ptr; + vd1.size = last_var_size; + VarDesc2 vd2 = { "var", 0 }; + + __offload_target_enter (ofldt, 1, &vd1, &vd2); + __offload_target_leave (ofldt); +} + +/* Call offload function by the address fn_ptr and pass vars_ptr to it. */ +static void +__offload_target_run (OFFLOAD ofldt) +{ + void *fn_ptr; + void *vars_ptr; + + VarDesc vd1[2] = { vd_host2tgt, vd_host2tgt }; + vd1[0].ptr = &fn_ptr; + vd1[0].size = sizeof (void *); + vd1[1].ptr = &vars_ptr; + vd1[1].size = sizeof (void *); + VarDesc2 vd2[2] = { { "fn_ptr", 0 }, { "vars_ptr", 0 } }; + + __offload_target_enter (ofldt, 2, vd1, vd2); + TRACE ("(fn_ptr = %p, vars_ptr = %p)", fn_ptr, vars_ptr); + void (*fn)(void *) = (void (*)(void *)) fn_ptr; + fn (vars_ptr); + __offload_target_leave (ofldt); +} + + +/* This should be called from every library with offloading. */ +extern "C" void +target_register_lib (const void *target_table) +{ + TRACE ("(target_table = %p { %p, %p, %p, %p })", target_table, + ((void **) target_table)[0], ((void **) target_table)[1], + ((void **) target_table)[2], ((void **) target_table)[3]); + + last_loaded_library = (void *) target_table; +} + +/* Use __offload_target_main from liboffload. */ +int +main (int argc, char **argv) +{ + __offload_target_main (); + return 0; +} + + +/* Register offload_target_main's functions in the liboffload. */ + +struct Entry { + const char *name; + void *func; +}; + +#define REGISTER(f) \ +extern "C" const Entry __offload_target_##f##_$entry \ +__attribute__ ((section(".OffloadEntryTable."))) = { \ + "__offload_target_"#f, \ + (void *) __offload_target_##f \ +} +REGISTER (init_proc); +REGISTER (table_p1); +REGISTER (table_p2); +REGISTER (alloc); +REGISTER (free); +REGISTER (host2tgt_p1); +REGISTER (host2tgt_p2); +REGISTER (tgt2host_p1); +REGISTER (tgt2host_p2); +REGISTER (run); +#undef REGISTER -- 1.7.1