This patch does the following two things: 1) Enable GOMP_MAP_FIRSTPRIVATE_INT in OpenaCC. 2) Extends the 'INT' values to handle floats and doubles via type casting.
OpenACC handles OMP_CLAUSE_FIRSTPRIVATE slightly different to OpenMP; lower_omp_target changes it to a data map clause. Consequently, it utilizes a different code path from OpenMP. The first part of this patch is to enable GOMP_MAP_FIRSTPRIVATE_INT in OpenACC. The second part involved extending GOMP_MAP_FIRSTPRIVATE_INT to support floating point values. The scientific applications that are using OpenACC tend to utilize a lot of floating point values. I'm not sure if there are any repercussions/side-effects this form of casting. If you take a close look at lower_omp_target, you'll notice that I'm gave reference types special treatment. Specifically, I disabled this optimization on non-INTEGER_TYPE and floating point values, because the nvptx target was having some problems dereferencing boolean-typed pointers. That's something I have on my TODO list to track down later. As for the performance gains, this optimization resulted in a non-trivial speedup in CloverLeaf running on a Nvidia Pascal board. CloverLeaf is somewhat special in that it consists of a lot of OpenACC offloaded regions which gets called multiple times throughout its execution. Consequently, it is I/O limited. The other benchmarks I ran didn't benefit nearly as much as CloverLeaf. I chose a small data set for CloverLeaf that only ran in 1.3s without the patch, and hence make it even more I/O limited. After the patch, it ran 0.35s faster. This patch has been applied to gomp-4_0-branch. Cesar
2017-01-27 Cesar Philippidis <ce...@codesourcery.com> gcc/ * omp-low.c (maybe_lookup_field_in_outer_ctx): New function. (convert_to_firstprivate_pointer): New function. (convert_from_firstprivate_pointer): New function. (lower_omp_target): Enable GOMP_MAP_FIRSTPRIVATE_INT in OpenACC. libgomp/ * plugin/plugin-nvptx.c (nvptx_exec): Make aware of GOMP_MAP_FIRSTPRIVATE_INT host addresses. * testsuite/libgomp.oacc-c++/firstprivate-int.C: New test. * testsuite/libgomp.oacc-c-c++-common/firstprivate-int.c: New test. * testsuite/libgomp.oacc-fortran/firstprivate-int.f90: New test. diff --git a/gcc/omp-low.c b/gcc/omp-low.c index 017f02a..adde8de 100644 --- a/gcc/omp-low.c +++ b/gcc/omp-low.c @@ -4359,6 +4359,19 @@ maybe_lookup_decl_in_outer_ctx (tree decl, omp_context *ctx) return t ? t : decl; } +/* Returns true if DECL is present inside a field that encloses CTX. */ + +static bool +maybe_lookup_field_in_outer_ctx (tree decl, omp_context *ctx) +{ + omp_context *up; + + for (up = ctx->outer; up; up = up->outer) + if (maybe_lookup_field (decl, up)) + return true; + + return false; +} /* Construct the initialization value for reduction operation OP. */ @@ -16439,6 +16452,98 @@ lower_omp_taskreg (gimple_stmt_iterator *gsi_p, omp_context *ctx) } } +/* Helper function for lower_omp_target. Converts VAR to something + that can be represented by a POINTER_SIZED_INT_NODE. Any new + instructions are appended to GS. This is primarily used to + optimize firstprivate variables, so that small types (less + precision than POINTER_SIZE) do not require additional data + mappings. */ + +static tree +convert_to_firstprivate_pointer (tree var, gimple_seq *gs) +{ + tree type = TREE_TYPE (var), new_type = NULL_TREE; + tree tmp = NULL_TREE; + + if (is_reference (var)) + type = TREE_TYPE (type); + + if (INTEGRAL_TYPE_P (type) || POINTER_TYPE_P (type)) + { + if (is_reference (var)) + { + tmp = create_tmp_var (type); + gimplify_assign (tmp, build_simple_mem_ref (var), gs); + var = tmp; + } + + return fold_convert (pointer_sized_int_node, var); + } + + switch (TYPE_PRECISION (type)) + { + case 1: case 2: case 4: case 8: new_type = unsigned_char_type_node; break; + case 16: new_type = short_unsigned_type_node; break; + case 32: new_type = unsigned_type_node; break; + case 64: new_type = long_unsigned_type_node; break; + default: gcc_unreachable (); + } + + if (is_reference (var)) + { + tmp = create_tmp_var (type); + gimplify_assign (tmp, build_simple_mem_ref (var), gs); + var = tmp; + } + + tmp = create_tmp_var (new_type); + var = fold_build1 (VIEW_CONVERT_EXPR, new_type, var); + gimplify_assign (tmp, var, gs); + var = fold_convert (pointer_sized_int_node, tmp); + + return var; +} + +/* Like convert_to_firstprivate_pointer, but restore the original type. */ + +static tree +convert_from_firstprivate_pointer (tree var, bool is_ref, gimple_seq *gs) +{ + tree type = TREE_TYPE (var); + tree new_type = NULL_TREE; + tree tmp = NULL_TREE; + tree inner_type = NULL_TREE; + + gcc_assert (TREE_CODE (var) == MEM_REF); + var = TREE_OPERAND (var, 0); + + if (INTEGRAL_TYPE_P (var) || POINTER_TYPE_P (type)) + return fold_convert (type, var); + + switch (TYPE_PRECISION (type)) + { + case 1: case 2: case 4: case 8: new_type = unsigned_char_type_node; break; + case 16: new_type = short_unsigned_type_node; break; + case 32: new_type = unsigned_type_node; break; + case 64: new_type = long_unsigned_type_node; break; + default: gcc_unreachable (); + } + + tmp = create_tmp_var (new_type); + var = fold_convert (new_type, var); + gimplify_assign (tmp, var, gs); + var = fold_build1 (VIEW_CONVERT_EXPR, type, tmp); + + if (is_ref) + { + tmp = create_tmp_var (build_pointer_type (type)); + gimplify_assign (tmp, build_fold_addr_expr (var), gs); + var = tmp; + } + + return var; +} + /* Lower the GIMPLE_OMP_TARGET in the current statement in GSI_P. CTX holds context information for the directive. */ @@ -16611,6 +16716,10 @@ lower_omp_target (gimple_stmt_iterator *gsi_p, omp_context *ctx) && OMP_CLAUSE_MAP_IN_REDUCTION (c))) { tree var_type = TREE_TYPE (var); + tree new_var = lookup_decl (var, ctx); + bool oacc_firstprivate_int = false; + tree inner_type = is_reference (new_var) + ? TREE_TYPE (var_type) : var_type; bool rcv_by_ref = (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_MAP && GOMP_MAP_DYNAMIC_ARRAY_P (OMP_CLAUSE_MAP_KIND (c)) @@ -16618,24 +16727,35 @@ lower_omp_target (gimple_stmt_iterator *gsi_p, omp_context *ctx) ? false : true); x = build_receiver_ref (var, rcv_by_ref, ctx); - tree new_var = lookup_decl (var, ctx); + + if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_FIRSTPRIVATE + && (TREE_CODE (inner_type) == REAL_TYPE + || (!is_reference (var) && INTEGRAL_TYPE_P (inner_type)) + || TREE_CODE (inner_type) == INTEGER_TYPE) + && TYPE_PRECISION (inner_type) <= POINTER_SIZE + && TYPE_PRECISION (inner_type) != 0 + && !maybe_lookup_field_in_outer_ctx (var, ctx)) + oacc_firstprivate_int = true; if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_MAP && OMP_CLAUSE_MAP_KIND (c) == GOMP_MAP_POINTER && !OMP_CLAUSE_MAP_ZERO_BIAS_ARRAY_SECTION (c) - && TREE_CODE (TREE_TYPE (var)) == ARRAY_TYPE) + && TREE_CODE (var_type) == ARRAY_TYPE + && !oacc_firstprivate_int) x = build_simple_mem_ref (x); if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_FIRSTPRIVATE) { gcc_assert (is_gimple_omp_oacc (ctx->stmt)); - if (is_reference (new_var) - && TREE_CODE (TREE_TYPE (new_var)) != POINTER_TYPE) + if (oacc_firstprivate_int) + x = convert_from_firstprivate_pointer (x, is_reference (var), + &fplist); + else if (is_reference (new_var) + && TREE_CODE (var_type) != POINTER_TYPE) { /* Create a local object to hold the instance value. */ - tree type = TREE_TYPE (TREE_TYPE (new_var)); const char *id = IDENTIFIER_POINTER (DECL_NAME (new_var)); - tree inst = create_tmp_var (type, id); + tree inst = create_tmp_var (TREE_TYPE (var_type), id); gimplify_assign (inst, fold_indirect_ref (x), &fplist); x = build_fold_addr_expr (inst); } @@ -16790,6 +16910,7 @@ lower_omp_target (gimple_stmt_iterator *gsi_p, omp_context *ctx) { tree ovar, nc, s, purpose, var, x, type; unsigned int talign; + bool oacc_firstprivate_int; default: break; @@ -16798,6 +16919,7 @@ lower_omp_target (gimple_stmt_iterator *gsi_p, omp_context *ctx) case OMP_CLAUSE_TO: case OMP_CLAUSE_FROM: oacc_firstprivate_map: + oacc_firstprivate_int = false; nc = c; ovar = OMP_CLAUSE_DECL (c); if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_MAP @@ -16882,8 +17004,25 @@ lower_omp_target (gimple_stmt_iterator *gsi_p, omp_context *ctx) } else if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_FIRSTPRIVATE) { + tree type = TREE_TYPE (var); + tree inner_type = is_reference (var) + ? TREE_TYPE (type) : type; gcc_checking_assert (is_gimple_omp_oacc (ctx->stmt)); - if (!is_reference (var)) + if ((TREE_CODE (inner_type) == REAL_TYPE + || (!is_reference (var) + && INTEGRAL_TYPE_P (inner_type)) + || TREE_CODE (inner_type) == INTEGER_TYPE) + && TYPE_PRECISION (inner_type) <= POINTER_SIZE + && TYPE_PRECISION (inner_type) != 0 + && !maybe_lookup_field_in_outer_ctx (var, ctx)) + { + oacc_firstprivate_int = true; + if (is_gimple_reg (var) + && OMP_CLAUSE_FIRSTPRIVATE_IMPLICIT (c)) + TREE_NO_WARNING (var) = 1; + var = convert_to_firstprivate_pointer (var, &ilist); + } + else if (!is_reference (var)) { if (is_gimple_reg (var) && OMP_CLAUSE_FIRSTPRIVATE_IMPLICIT (c)) @@ -16935,10 +17074,15 @@ lower_omp_target (gimple_stmt_iterator *gsi_p, omp_context *ctx) if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_FIRSTPRIVATE) { gcc_checking_assert (is_gimple_omp_oacc (ctx->stmt)); - s = TREE_TYPE (ovar); - if (TREE_CODE (s) == REFERENCE_TYPE) - s = TREE_TYPE (s); - s = TYPE_SIZE_UNIT (s); + if (oacc_firstprivate_int) + s = size_int (0); + else + { + s = TREE_TYPE (ovar); + if (TREE_CODE (s) == REFERENCE_TYPE) + s = TREE_TYPE (s); + s = TYPE_SIZE_UNIT (s); + } } else if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_MAP && (OMP_CLAUSE_MAP_KIND (c) & GOMP_MAP_DYNAMIC_ARRAY)) @@ -16997,7 +17141,10 @@ lower_omp_target (gimple_stmt_iterator *gsi_p, omp_context *ctx) break; case OMP_CLAUSE_FIRSTPRIVATE: gcc_checking_assert (is_gimple_omp_oacc (ctx->stmt)); - tkind = GOMP_MAP_TO; + if (oacc_firstprivate_int) + tkind = GOMP_MAP_FIRSTPRIVATE_INT; + else + tkind = GOMP_MAP_TO; tkind_zero = tkind; break; case OMP_CLAUSE_TO: diff --git a/libgomp/plugin/plugin-nvptx.c b/libgomp/plugin/plugin-nvptx.c index c435012..a05b399 100644 --- a/libgomp/plugin/plugin-nvptx.c +++ b/libgomp/plugin/plugin-nvptx.c @@ -1018,7 +1018,7 @@ nvptx_exec (void (*fn), size_t mapnum, void **hostaddrs, void **devaddrs, /* Copy the array of arguments to the mapped page. */ hp = alloca(sizeof(void *) * mapnum); for (i = 0; i < mapnum; i++) - ((void **) hp)[i] = devaddrs[i]; + ((void **) hp)[i] = devaddrs[i] != 0 ? devaddrs[i] : hostaddrs[i]; /* Copy the (device) pointers to arguments to the device (dp and hp might in fact have the same value on a unified-memory system). */ diff --git a/libgomp/testsuite/libgomp.oacc-c++/firstprivate-int.C b/libgomp/testsuite/libgomp.oacc-c++/firstprivate-int.C new file mode 100644 index 0000000..94667b5 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c++/firstprivate-int.C @@ -0,0 +1,83 @@ +/* Verify the GOMP_MAP_FIRSTPRIVATE_INT optimziation on various types. + This test is similer to the test in libgomp.oacc-c-c++-common, but + it focuses on reference types. */ + +#include <assert.h> +#include <stdint.h> +#include <complex.h> + +void test_ref (int8_t &i8i, int8_t &i8o, int16_t &i16i, int16_t &i16o, + int32_t &i32i, int32_t &i32o, int64_t &i64i, int64_t &i64o, + uint8_t &u8i, uint8_t &u8o, uint16_t &u16i, uint16_t &u16o, + uint32_t &u32i, uint32_t &u32o, uint64_t &u64i, uint64_t &u64o, + float &r32i, float &r32o, double &r64i, double &r64o, + int _Complex &cii, int _Complex &cio, + float _Complex &cfi, float _Complex &cfo, + double _Complex &cdi, double _Complex &cdo) +{ +#pragma acc parallel firstprivate (i8i,i16i,i32i,i64i,u8i,u16i,u32i,u64i) \ + firstprivate(r32i,r64i,cii,cfi,cdi) copyout(i8o,i16o,i32o,i64o) \ + copyout(u8o,u16o,u32o,u64o,r32o,r64o,cio,cfo,cdo) num_gangs(1) + { + i8o = i8i; + i16o = i16i; + i32o = i32i; + i64o = i64i; + + u8o = u8i; + u16o = u16i; + u32o = u32i; + u64o = u64i; + + r32o = r32i; + r64o = r64i; + + cio = cii; + cfo = cfi; + cdo = cdi; + } +} + +int +main () +{ + int8_t i8i = -1, i8o; + int16_t i16i = -2, i16o; + int32_t i32i = -3, i32o; + int64_t i64i = -4, i64o; + + uint8_t u8i = 1, u8o; + uint16_t u16i = 2, u16o; + uint32_t u32i = 3, u32o; + uint64_t u64i = 4, u64o; + + float r32i = .5, r32o; + double r64i = .25, r64o; + + int _Complex cii = 2, cio; + float _Complex cfi = 4, cfo; + double _Complex cdi = 8, cdo; + + test_ref (i8i, i8o, i16i, i16o, i32i, i32o, i64i, i64o, u8i, u8o, u16i, + u16o, u32i, u32o, u64i, u64o, r32i, r32o, r64i, r64o, cii, cio, + cfi, cfo, cdi, cdo); + + assert(i8o = i8i); + assert(i16o = i16i); + assert(i32o = i32i); + assert(i64o = i64i); + + assert(u8o = u8i); + assert(u16o = u16i); + assert(u32o = u32i); + assert(u64o = u64i); + + assert(r32o = r32i); + assert(r64o = r64i); + + assert(cio = cii); + assert(cfo = cfi); + assert(cdo = cdi); + + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/firstprivate-int.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/firstprivate-int.c new file mode 100644 index 0000000..d9da9a0 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/firstprivate-int.c @@ -0,0 +1,67 @@ +/* Verify the GOMP_MAP_FIRSTPRIVATE_INT optimziation on various types. */ + +#include <assert.h> +#include <stdint.h> +#include <complex.h> + +int +main () +{ + int8_t i8i = -1, i8o; + int16_t i16i = -2, i16o; + int32_t i32i = -3, i32o; + int64_t i64i = -4, i64o; + + uint8_t u8i = 1, u8o; + uint16_t u16i = 2, u16o; + uint32_t u32i = 3, u32o; + uint64_t u64i = 4, u64o; + + float r32i = .5, r32o; + double r64i = .25, r64o; + + int _Complex cii = 2, cio; + float _Complex cfi = 4, cfo; + double _Complex cdi = 8, cdo; + +#pragma acc parallel firstprivate (i8i,i16i,i32i,i64i,u8i,u16i,u32i,u64i) \ + firstprivate(r32i,r64i,cii,cfi,cdi) copyout(i8o,i16o,i32o,i64o) \ + copyout(u8o,u16o,u32o,u64o,r32o,r64o,cio,cfo,cdo) num_gangs(1) + { + i8o = i8i; + i16o = i16i; + i32o = i32i; + i64o = i64i; + + u8o = u8i; + u16o = u16i; + u32o = u32i; + u64o = u64i; + + r32o = r32i; + r64o = r64i; + + cio = cii; + cfo = cfi; + cdo = cdi; + } + + assert(i8o = i8i); + assert(i16o = i16i); + assert(i32o = i32i); + assert(i64o = i64i); + + assert(u8o = u8i); + assert(u16o = u16i); + assert(u32o = u32i); + assert(u64o = u64i); + + assert(r32o = r32i); + assert(r64o = r64i); + + assert(cio = cii); + assert(cfo = cfi); + assert(cdo = cdi); + + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-fortran/firstprivate-int.f90 b/libgomp/testsuite/libgomp.oacc-fortran/firstprivate-int.f90 new file mode 100644 index 0000000..3408d3d --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-fortran/firstprivate-int.f90 @@ -0,0 +1,203 @@ +! Verify the GOMP_MAP_FIRSTPRIVATE_INT optimziation on various types. + +program test + implicit none + + integer (kind=1) :: i1i, i1o + integer (kind=2) :: i2i, i2o + integer (kind=4) :: i4i, i4o + integer (kind=8) :: i8i, i8o + integer (kind=16) :: i16i, i16o + + logical (kind=1) :: l1i, l1o + logical (kind=2) :: l2i, l2o + logical (kind=4) :: l4i, l4o + logical (kind=8) :: l8i, l8o + logical (kind=16) :: l16i, l16o + + real (kind=4) :: r4i, r4o + real (kind=8) :: r8i, r8o + + complex (kind=4) :: c4i, c4o + complex (kind=8) :: c8i, c8o + + character (kind=1) :: ch1i, ch1o + character (kind=4) :: ch4i, ch4o + + i1i = 1 + i2i = 2 + i4i = 3 + i8i = 4 + i16i = 5 + + l1i = .true. + l2i = .false. + l4i = .true. + l8i = .true. + l16i = .false. + + r4i = .5 + r8i = .25 + + c4i = (2, -2) + c8i = (4, -4) + + ch1i = "a" + ch4i = "b" + + !$acc parallel firstprivate(i1i, i2i, i4i, i8i, i16i) & + !$acc copyout(i1o, i2o, i4o, i8o, i16o) & + !$acc firstprivate(l1i, l2i, l4i, l8i, l16i) & + !$acc copyout(l1o, l2o, l4o, l8o, l16o) & + !$acc firstprivate(r4i, r8i) copyout(r4o, r8o) & + !$acc firstprivate(c4i, c8i) copyout(c4o, c8o) & + !$acc firstprivate(ch1i, ch4i) & + !$acc copyout(ch1o, ch4o) + i1o = i1i + i2o = i2i + i4o = i4i + i8o = i8i + i16o = i16i + + l1o = l1i + l2o = l2i + l4o = l4i + l8o = l8i + l16o = l16i + + r4o = r4i + r8o = r8i + + c4o = c4i + c8o = c8i + + ch1o = ch1i + ch4o = ch4i + !$acc end parallel + + if (i1i /= i1o) call abort + if (i2i /= i2o) call abort + if (i4i /= i4o) call abort + if (i8i /= i8o) call abort + if (i16i /= i16o) call abort + + if (l1i .neqv. l1o) call abort + if (l2i .neqv. l2o) call abort + if (l4i .neqv. l4o) call abort + if (l8i .neqv. l8o) call abort + if (l16i .neqv. l16o) call abort + + if (r4i /= r4o) call abort + if (r8i /= r8o) call abort + + if (c4i /= c4o) call abort + if (c8i /= c8o) call abort + + if (ch1i /= ch1o) call abort + if (ch4i /= ch4o) call abort + + call subtest(i1i, i2i, i4i, i8i, i16i, i1o, i2o, i4o, i8o, i16o, & + l1i, l2i, l4i, l8i, l16i, l1o, l2o, l4o, l8o, l16o, & + r4i, r8i, r4o, r8o, c4i, c8i, c4o, c8o, & + ch1i, ch4i, ch1o, ch4o) +end program test + +subroutine subtest(i1i, i2i, i4i, i8i, i16i, i1o, i2o, i4o, i8o, i16o, & + l1i, l2i, l4i, l8i, l16i, l1o, l2o, l4o, l8o, l16o, & + r4i, r8i, r4o, r8o, c4i, c8i, c4o, c8o, & + ch1i, ch4i, ch1o, ch4o) + implicit none + + integer (kind=1) :: i1i, i1o + integer (kind=2) :: i2i, i2o + integer (kind=4) :: i4i, i4o + integer (kind=8) :: i8i, i8o + integer (kind=16) :: i16i, i16o + + logical (kind=1) :: l1i, l1o + logical (kind=2) :: l2i, l2o + logical (kind=4) :: l4i, l4o + logical (kind=8) :: l8i, l8o + logical (kind=16) :: l16i, l16o + + real (kind=4) :: r4i, r4o + real (kind=8) :: r8i, r8o + + complex (kind=4) :: c4i, c4o + complex (kind=8) :: c8i, c8o + + character (kind=1) :: ch1i, ch1o + character (kind=4) :: ch4i, ch4o + + i1i = -i1i + i2i = -i2i + i4i = -i4i + i8i = -i8i + i16i = -i16i + + l1i = .not. l1i + l2i = .not. l2i + l4i = .not. l4i + l8i = .not. l8i + l16i = .not. l16i + + r4i = -r4i + r8i = -r8i + + c4i = -c4i + c8i = -c8i + + ch1i = "z" + ch4i = "y" + + !$acc parallel firstprivate(i1i, i2i, i4i, i8i, i16i) & + !$acc copyout(i1o, i2o, i4o, i8o, i16o) & + !$acc firstprivate(l1i, l2i, l4i, l8i, l16i) & + !$acc copyout(l1o, l2o, l4o, l8o, l16o) & + !$acc firstprivate(r4i, r8i) copyout(r4o, r8o) & + !$acc firstprivate(c4i, c8i) copyout(c4o, c8o) & + !$acc firstprivate(ch1i, ch4i) & + !$acc copyout(ch1o, ch4o) + i1o = i1i + i2o = i2i + i4o = i4i + i8o = i8i + i16o = i16i + + l1o = l1i + l2o = l2i + l4o = l4i + l8o = l8i + l16o = l16i + + r4o = r4i + r8o = r8i + + c4o = c4i + c8o = c8i + + ch1o = ch1i + ch4o = ch4i + !$acc end parallel + + if (i1i /= i1o) call abort + if (i2i /= i2o) call abort + if (i4i /= i4o) call abort + if (i8i /= i8o) call abort + if (i16i /= i16o) call abort + + if (l1i .neqv. l1o) call abort + if (l2i .neqv. l2o) call abort + if (l4i .neqv. l4o) call abort + if (l8i .neqv. l8o) call abort + if (l16i .neqv. l16o) call abort + + if (r4i /= r4o) call abort + if (r8i /= r8o) call abort + + if (c4i /= c4o) call abort + if (c8i /= c8o) call abort + + if (ch1i /= ch1o) call abort + if (ch4i /= ch4o) call abort +end subroutine subtest