Hi, On 2023-10-19 06:20:26 +1300, Thomas Munro wrote: > Interestingly, a new problem just showed up on the the RHEL9 s390x > machine "lora", where a previously reported problem [1] apparently > re-appeared. It complains about incompatible layout, previously > blamed on mismatch between clang and LLVM versions.
I've attached a patch revision that I spent the last couple hours working on. It's very very roughly based on a patch Tom Stellard had written (which I think a few rpm packages use). But instead of encoding details about specific layout details, I made the code check if the data layout works and fall back to the cpu / features used for llvmjit_types.bc. This way it's not s390x specific, future odd architecture behaviour would "automatically" be handled the same. With that at least the main regression tests pass on s390x, even with jit_above_cost=0. > I can see that its clang is v15 from clues in the conflig log, but I don't > know which version of LLVM is being used. However, I see now that > --with-llvm was literally just turned on, so there is no reason to think > that this would have worked before or this work is relevant. Strange though > -- we must be able to JIT further than that on s390x because we have crash > reports in other threads (ie we made it past this and into other more > advanced brokenness). You can avoid the borkedness by a) running on an older cpu b) adding compilation flags to change the code generation target (e.g. -march=native). And some RPM packages have applied the patch by Tom Stellard. > [1] > https://www.postgresql.org/message-id/flat/20210319190047.7o4bwhbp5dzkqif3%40alap3.anarazel.de#ec51b488ca8eac8c603d91c0439d38b2 Greetings, Andres Freund
>From e7ab9eb11576ef85e4d2f6e1ade0a6028279634d Mon Sep 17 00:00:00 2001 From: Andres Freund <and...@anarazel.de> Date: Fri, 20 Oct 2023 23:03:53 -0700 Subject: [PATCH v2] jit: Add fallback in case of runtime/compile time ABI mismatch LLVM's s390x target uses a different ABI (called data layout in LLVM) for z13 and newer processors. If IR files (like llvmjit_types.bc) are compiled to target a processor older than z13, which is the default, and JIT occurs on a z13 or newer processor, the ABI mismatch will cause JIT to fail at runtime. To deal with that, check if data layouts match during JIT initialization. If the runtime detected cpu / features result in a different layout, try if the cpu/features recorded in in llvmjit_types.bc work. Author: Andres Freund <and...@anarazel.de> Author: Tom Stellard <tstel...@redhat.com> (in an older version) Discussion: 16971-5d004d34742a3...@postgresql.org Discussion: ca+hukg+hoperbgvws4je3uy1uo3trnhf08hmispmrpodgti...@mail.gmail.com --- src/backend/jit/llvm/llvmjit.c | 191 ++++++++++++++++++++++++++++----- 1 file changed, 166 insertions(+), 25 deletions(-) diff --git a/src/backend/jit/llvm/llvmjit.c b/src/backend/jit/llvm/llvmjit.c index 58f638859a4..fa2a982991d 100644 --- a/src/backend/jit/llvm/llvmjit.c +++ b/src/backend/jit/llvm/llvmjit.c @@ -101,6 +101,8 @@ static size_t llvm_jit_context_in_use_count = 0; static size_t llvm_llvm_context_reuse_count = 0; static const char *llvm_triple = NULL; static const char *llvm_layout = NULL; +static char *llvm_cpu = NULL; +static char *llvm_cpu_features = NULL; static LLVMContextRef llvm_context; @@ -123,6 +125,7 @@ static void llvm_optimize_module(LLVMJitContext *context, LLVMModuleRef module); static void llvm_create_types(void); static void llvm_set_target(void); +static void llvm_set_cpu_and_features(void); static void llvm_recreate_llvm_context(void); static uint64_t llvm_resolve_symbol(const char *name, void *ctx); @@ -884,9 +887,6 @@ static void llvm_session_initialize(void) { MemoryContext oldcontext; - char *error = NULL; - char *cpu = NULL; - char *features = NULL; LLVMTargetMachineRef opt0_tm; LLVMTargetMachineRef opt3_tm; @@ -931,38 +931,21 @@ llvm_session_initialize(void) */ llvm_set_target(); - if (LLVMGetTargetFromTriple(llvm_triple, &llvm_targetref, &error) != 0) - { - elog(FATAL, "failed to query triple %s", error); - } - - /* - * We want the generated code to use all available features. Therefore - * grab the host CPU string and detect features of the current CPU. The - * latter is needed because some CPU architectures default to enabling - * features not all CPUs have (weird, huh). - */ - cpu = LLVMGetHostCPUName(); - features = LLVMGetHostCPUFeatures(); - elog(DEBUG2, "LLVMJIT detected CPU \"%s\", with features \"%s\"", - cpu, features); + llvm_set_cpu_and_features(); opt0_tm = - LLVMCreateTargetMachine(llvm_targetref, llvm_triple, cpu, features, + LLVMCreateTargetMachine(llvm_targetref, llvm_triple, + llvm_cpu, llvm_cpu_features, LLVMCodeGenLevelNone, LLVMRelocDefault, LLVMCodeModelJITDefault); opt3_tm = - LLVMCreateTargetMachine(llvm_targetref, llvm_triple, cpu, features, + LLVMCreateTargetMachine(llvm_targetref, llvm_triple, + llvm_cpu, llvm_cpu_features, LLVMCodeGenLevelAggressive, LLVMRelocDefault, LLVMCodeModelJITDefault); - LLVMDisposeMessage(cpu); - cpu = NULL; - LLVMDisposeMessage(features); - features = NULL; - /* force symbols in main binary to be loaded */ LLVMLoadLibraryPermanently(NULL); @@ -1092,6 +1075,73 @@ load_return_type(LLVMModuleRef mod, const char *name) return typ; } +/* + * Copies a string that needs to be freed with LLVMDisposeMessage() and then + * frees the source string. + */ +static char * +llvm_to_pg_str(char *str) +{ + char *ret = pstrdup(str); + + LLVMDisposeMessage(str); + + return ret; +} + +/* + * Return data layout for a target machine created with cpu and features + * + * The return value is a palloc'd string. + */ +static char * +determine_data_layout(const char *cpu, const char *features) +{ + LLVMTargetMachineRef tm; + LLVMTargetDataRef layout; + char *layout_str; + + tm = LLVMCreateTargetMachine(llvm_targetref, llvm_triple, cpu, features, + LLVMCodeGenLevelNone, + LLVMRelocDefault, + LLVMCodeModelJITDefault); + layout = LLVMCreateTargetDataLayout(tm); + layout_str = LLVMCopyStringRepOfTargetData(layout); + + LLVMDisposeTargetData(layout); + LLVMDisposeTargetMachine(tm); + + return llvm_to_pg_str(layout_str); +} + +/* + * Convenience wrapper around LLVMGetStringAttributeAtIndex & + * LLVMGetStringAttributeValue. + * + * The return value is a zero-terminated, palloc'd string. + */ +static char * +get_string_attribute_value(LLVMValueRef v, uint32 index, + const char *name, const char *fallback) +{ + LLVMAttributeRef attr; + const char *val; + unsigned len; + + attr = LLVMGetStringAttributeAtIndex(v, index, name, strlen(name)); + if (!attr) + return fallback ? pstrdup(fallback) : NULL; + + val = LLVMGetStringAttributeValue(attr, &len); + + /* + * LLVMGetStringAttributeValue() returns values not zero terminated, which + * inconvenient to work with. Also has the advantage that the return value + * is freed by memory context cleanup etc. + */ + return psprintf("%.*s", len, val); +} + /* * Load triple & layout from clang emitted file so we're guaranteed to be * compatible. @@ -1099,14 +1149,105 @@ load_return_type(LLVMModuleRef mod, const char *name) static void llvm_set_target(void) { + char *error = NULL; + if (!llvm_types_module) elog(ERROR, "failed to extract target information, llvmjit_types.c not loaded"); + /* can get called again after partial initialization */ + if (llvm_triple == NULL) llvm_triple = pstrdup(LLVMGetTarget(llvm_types_module)); if (llvm_layout == NULL) llvm_layout = pstrdup(LLVMGetDataLayoutStr(llvm_types_module)); + + if (llvm_targetref == NULL) + { + if (LLVMGetTargetFromTriple(llvm_triple, &llvm_targetref, &error) != 0) + elog(FATAL, "failed to query triple %s", error); + } +} + +/* + * Determine CPU and features to use for JIT. + * + * We want the generated code to use all available features. Therefore + * grab the host CPU string and detect features of the current CPU. The + * latter is needed because some CPU architectures default to enabling + * features not all CPUs have (weird, huh). + * + * Unfortunately there is at least one architecture on which LLVM doesn't play + * fair - on s390, LLVM will use a different ABI for the same triple, + * depending on host CPU (IMO not a sane decision, but ...). To work around + * that, if the layout of llvmjit_types.bc does not match what we get using + * the host cpu / features, try target-cpu/target-features that clang recorded + * in llvmjit_types.bc at compile time. + */ +static void +llvm_set_cpu_and_features(void) +{ + char *host_cpu; + char *host_cpu_features; + char *host_layout_str; + + /* can get called again after partial initialization */ + if (llvm_cpu != NULL) + return; + + /* determine runtime CPU / feature */ + host_cpu = llvm_to_pg_str(LLVMGetHostCPUName()); + host_cpu_features = llvm_to_pg_str(LLVMGetHostCPUFeatures()); + host_layout_str = determine_data_layout(host_cpu, host_cpu_features); + + elog(DEBUG2, "detected CPU \"%s\", with features \"%s\", resulting in layout \"%s\"", + host_cpu, host_cpu_features, host_layout_str); + + /* check if we can use detected values or if we need to fall back */ + if (strcmp(host_layout_str, llvm_layout) == 0) + { + llvm_cpu = host_cpu; + llvm_cpu_features = host_cpu_features; + pfree(host_layout_str); + } + else + { + char *module_cpu; + char *module_cpu_features; + char *module_layout_str; + + /* incompatible, try to fall back to module cpu / features */ + + module_cpu = get_string_attribute_value(AttributeTemplate, + LLVMAttributeFunctionIndex, + "target-cpu", "generic"); + module_cpu_features = get_string_attribute_value(AttributeTemplate, + LLVMAttributeFunctionIndex, + "target-features", ""); + module_layout_str = determine_data_layout(module_cpu, module_cpu_features); + + if (strcmp(module_layout_str, llvm_layout) != 0) + { + /* leaking a few strings, this isn't expected to ever be hit */ + ereport(ERROR, + errmsg_internal("could not determine working CPU / feature comination for JIT compilation"), + errdetail_internal("compile time data layout: \"%s\", host layout \"%s\", fallback layout \"%s\"", + llvm_layout, host_layout_str, module_layout_str)); + } + + llvm_cpu = module_cpu; + llvm_cpu_features = module_cpu_features; + + ereport(DEBUG2, + errmsg_internal("detected CPU / features yield incompatible data layout, using values from module instead"), + errdetail_internal("module CPU \"%s\", features \"%s\", resulting in layout \"%s\"", + module_cpu, module_cpu_features, module_layout_str)); + + pfree(host_cpu); + pfree(host_cpu_features); + pfree(host_layout_str); + pfree(module_layout_str); + } } /* -- 2.38.0