This is an automated email from the ASF dual-hosted git repository.
hgruszecki pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/iggy.git
The following commit(s) were added to refs/heads/master by this push:
new 484f98a59 feat(server): add io_uring EINVAL diagnostics for shard
executors (#3021)
484f98a59 is described below
commit 484f98a5989764ffc28a5e863d753f6b6e9b2954
Author: Hubert Gruszecki <[email protected]>
AuthorDate: Tue Mar 24 12:56:02 2026 +0100
feat(server): add io_uring EINVAL diagnostics for shard executors (#3021)
---
Cargo.toml | 2 +-
core/server/src/bootstrap.rs | 6 +-
core/server/src/diagnostics.rs | 226 +++++++++++++++++++++++++++++++++++++++++
core/server/src/main.rs | 28 ++++-
4 files changed, 257 insertions(+), 5 deletions(-)
diff --git a/Cargo.toml b/Cargo.toml
index bdd7d84a1..6ac198684 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -184,7 +184,7 @@ mime_guess = "2.0"
mockall = "0.14.0"
moka = { version = "0.12.14", features = ["future"] }
mongodb = { version = "3.5.2", features = ["rustls-tls"] }
-nix = { version = "0.31.2", features = ["fs", "resource", "sched"] }
+nix = { version = "0.31.2", features = ["feature", "fs", "resource", "sched"] }
nonzero_lit = "0.1.2"
notify = "8.2.0"
octocrab = "0.49.5"
diff --git a/core/server/src/bootstrap.rs b/core/server/src/bootstrap.rs
index a874fd068..3a5661bce 100644
--- a/core/server/src/bootstrap.rs
+++ b/core/server/src/bootstrap.rs
@@ -165,7 +165,10 @@ pub fn create_root_user() -> User {
User::root(&username, &password)
}
-pub fn create_shard_executor() -> Runtime {
+// Shard executors require IORING_SETUP_COOP_TASKRUN for predictable latency.
+// Falling back to default flags would silently degrade shard performance -
+// do not add a retry with reduced flags here.
+pub fn create_shard_executor() -> Result<Runtime, std::io::Error> {
// TODO: The event interval tick, could be configured based on the fact
// How many clients we expect to have connected.
// This roughly estimates the number of tasks we will create.
@@ -186,7 +189,6 @@ pub fn create_shard_executor() -> Runtime {
.with_proactor(proactor.to_owned())
.event_interval(128)
.build()
- .unwrap()
}
pub fn resolve_persister(enforce_fsync: bool) -> Arc<PersisterKind> {
diff --git a/core/server/src/diagnostics.rs b/core/server/src/diagnostics.rs
index 6029cbf88..32a19ea3b 100644
--- a/core/server/src/diagnostics.rs
+++ b/core/server/src/diagnostics.rs
@@ -16,6 +16,15 @@
* under the License.
*/
+#[cfg(target_os = "linux")]
+const DISCORD_SUPPORT_URL: &str = "https://discord.gg/apache-iggy";
+
+#[cfg(target_os = "linux")]
+fn print_discord_link() {
+ eprintln!(" Need help? Join our Discord: {DISCORD_SUPPORT_URL}");
+ eprintln!();
+}
+
/// Prints information about locked memory limits when runtime creation fails.
/// This is typically needed when io_uring cannot allocate memory due to
RLIMIT_MEMLOCK.
#[cfg(target_os = "linux")]
@@ -71,6 +80,7 @@ pub fn print_locked_memory_limit_info() {
eprintln!(" 5. For systemd services (add to service file):");
eprintln!(" LimitMEMLOCK=infinity");
eprintln!();
+ print_discord_link();
}
/// Prints information about io_uring permission issues in containerized
environments.
@@ -101,6 +111,180 @@ pub fn print_io_uring_permission_info() {
eprintln!(" seccompProfile:");
eprintln!(" type: Unconfined");
eprintln!();
+ print_discord_link();
+}
+
+/// Minimum kernel version for IORING_SETUP_COOP_TASKRUN and
IORING_SETUP_TASKRUN_FLAG.
+#[cfg(target_os = "linux")]
+const MIN_KERNEL_MAJOR: u32 = 5;
+#[cfg(target_os = "linux")]
+const MIN_KERNEL_MINOR: u32 = 19;
+
+/// Minimum kernel version for kernel.io_uring_disabled sysctl.
+#[cfg(target_os = "linux")]
+const SYSCTL_IO_URING_DISABLED_KERNEL_MAJOR: u32 = 6;
+#[cfg(target_os = "linux")]
+const SYSCTL_IO_URING_DISABLED_KERNEL_MINOR: u32 = 1;
+
+/// Prints diagnostic information when io_uring setup fails with EINVAL.
+///
+/// This typically occurs when the kernel does not support the io_uring flags
+/// required by shard executors (IORING_SETUP_COOP_TASKRUN,
IORING_SETUP_TASKRUN_FLAG).
+/// The caller is responsible for deduplication (e.g., via `std::sync::Once`).
+#[cfg(target_os = "linux")]
+pub fn print_invalid_io_uring_args_info() {
+ use nix::sys::utsname::uname;
+ use std::fs;
+
+ eprintln!();
+ eprintln!("=== io_uring Invalid Argument (EINVAL) ===");
+ eprintln!();
+ eprintln!("The shard executor failed to initialize because the kernel
rejected");
+ eprintln!("io_uring setup flags required for shard operation.");
+ eprintln!();
+ eprintln!(" The main thread's io_uring runtime uses default settings and
initialized");
+ eprintln!(" successfully. Shard executors require additional flags:");
+ eprintln!(" - IORING_SETUP_COOP_TASKRUN (cooperative task running)");
+ eprintln!(" - IORING_SETUP_TASKRUN_FLAG (task runner flag
notification)");
+ eprintln!(
+ " These flags require Linux kernel >=
{MIN_KERNEL_MAJOR}.{MIN_KERNEL_MINOR} with full io_uring support."
+ );
+ eprintln!();
+
+ let mut detected_issues: Vec<String> = Vec::new();
+
+ // 1. Kernel version check
+ let uname_info = match uname() {
+ Ok(info) => Some(info),
+ Err(_) => {
+ eprintln!(" [!] Could not retrieve kernel information via
uname(2).");
+ None
+ }
+ };
+
+ let mut kernel_version: Option<(u32, u32)> = None;
+
+ if let Some(ref info) = uname_info {
+ let release = info.release().to_string_lossy();
+ eprintln!(" Kernel release: {release}");
+
+ if let Some((major, minor)) = parse_kernel_version(&release) {
+ kernel_version = Some((major, minor));
+ if (major, minor) < (MIN_KERNEL_MAJOR, MIN_KERNEL_MINOR) {
+ detected_issues.push(format!(
+ "Kernel {major}.{minor} is too old (need >=
{MIN_KERNEL_MAJOR}.{MIN_KERNEL_MINOR})"
+ ));
+ }
+ } else {
+ eprintln!(" [!] Could not parse kernel version from release
string.");
+ }
+
+ // 2. WSL2 detection
+ let release_is_wsl = release.contains("microsoft") ||
release.contains("Microsoft");
+ let proc_version_is_wsl = fs::read_to_string("/proc/version")
+ .map(|v| v.contains("Microsoft") || v.contains("microsoft"))
+ .unwrap_or(false);
+
+ if release_is_wsl || proc_version_is_wsl {
+ eprintln!(" Environment: WSL2 (Microsoft kernel fork detected)");
+ detected_issues.push(
+ "WSL2 kernel may not support IORING_SETUP_COOP_TASKRUN even if
version >= 5.19"
+ .to_string(),
+ );
+ }
+ }
+
+ // 3. kernel.io_uring_disabled sysctl (available since kernel 6.1)
+ match fs::read_to_string("/proc/sys/kernel/io_uring_disabled") {
+ Ok(value) => {
+ let value = value.trim();
+ eprintln!(" kernel.io_uring_disabled = {value}");
+ match value {
+ "1" => detected_issues
+ .push("io_uring is disabled for unprivileged users (sysctl
= 1)".to_string()),
+ "2" => detected_issues
+ .push("io_uring is fully disabled by sysctl (sysctl =
2)".to_string()),
+ _ => {}
+ }
+ }
+ Err(_) => {
+ // The sysctl was introduced in kernel 6.1. If the file is absent
on a kernel >= 6.1,
+ // io_uring is likely not compiled in (CONFIG_IO_URING=n).
+ if let Some((major, minor)) = kernel_version
+ && (major, minor)
+ >= (
+ SYSCTL_IO_URING_DISABLED_KERNEL_MAJOR,
+ SYSCTL_IO_URING_DISABLED_KERNEL_MINOR,
+ )
+ {
+ detected_issues.push(format!(
+ "kernel.io_uring_disabled sysctl not found on kernel >= \
+
{SYSCTL_IO_URING_DISABLED_KERNEL_MAJOR}.{SYSCTL_IO_URING_DISABLED_KERNEL_MINOR}
\
+ - io_uring may not be compiled in (CONFIG_IO_URING=n)"
+ ));
+ }
+ }
+ }
+
+ // 4. AppArmor - informational only, not added to detected_issues
+ let apparmor_profile =
fs::read_to_string("/proc/self/attr/apparmor/current")
+ .ok()
+ .map(|s| s.trim().to_string());
+
+ if let Some(ref profile) = apparmor_profile
+ && profile != "unconfined"
+ && !profile.is_empty()
+ {
+ eprintln!(" AppArmor profile: {profile}");
+ }
+
+ // Print detected issues
+ if detected_issues.is_empty() {
+ eprintln!();
+ eprintln!(" No specific issue was detected. The kernel may lack
io_uring support");
+ eprintln!(" for the flags used by Iggy's shard executors.");
+ } else {
+ eprintln!();
+ eprintln!(" Detected issues:");
+ for (i, issue) in detected_issues.iter().enumerate() {
+ eprintln!(" {}. {issue}", i + 1);
+ }
+ }
+
+ eprintln!();
+ eprintln!(" To resolve this:");
+ eprintln!();
+ eprintln!(
+ " 1. Upgrade to Linux kernel >= {MIN_KERNEL_MAJOR}.{MIN_KERNEL_MINOR}
(>=
{SYSCTL_IO_URING_DISABLED_KERNEL_MAJOR}.{SYSCTL_IO_URING_DISABLED_KERNEL_MINOR}
recommended)"
+ );
+ eprintln!();
+ eprintln!(" 2. If running under WSL2:");
+ eprintln!(" - Update WSL: wsl --update (from PowerShell)");
+ eprintln!(" - Or build a custom kernel with full io_uring support:");
+ eprintln!("
https://learn.microsoft.com/en-us/windows/wsl/wsl-config#wsl-2-settings");
+ eprintln!(" - Or use Docker Desktop / a native Linux VM instead of
WSL2");
+ eprintln!();
+ eprintln!(" 3. If io_uring is disabled via sysctl:");
+ eprintln!(" sudo sysctl -w kernel.io_uring_disabled=0");
+ eprintln!();
+ eprintln!(" 4. If AppArmor is restricting io_uring:");
+ eprintln!(" sudo aa-complain <profile-name>");
+ eprintln!();
+ eprintln!(" 5. Check kernel logs for more details:");
+ eprintln!(" dmesg | grep -i io_uring");
+ eprintln!();
+ print_discord_link();
+}
+
+/// Parses "major.minor[.patch...][-suffix]" from a kernel release string.
+#[cfg(target_os = "linux")]
+fn parse_kernel_version(release: &str) -> Option<(u32, u32)> {
+ let mut parts = release
+ .split(|c: char| !c.is_ascii_digit())
+ .filter(|s| !s.is_empty());
+ let major = parts.next()?.parse::<u32>().ok()?;
+ let minor = parts.next()?.parse::<u32>().ok()?;
+ Some((major, minor))
}
#[cfg(not(target_os = "linux"))]
@@ -108,3 +292,45 @@ pub fn print_locked_memory_limit_info() {}
#[cfg(not(target_os = "linux"))]
pub fn print_io_uring_permission_info() {}
+
+#[cfg(not(target_os = "linux"))]
+pub fn print_invalid_io_uring_args_info() {}
+
+#[cfg(test)]
+mod tests {
+ #[cfg(target_os = "linux")]
+ use super::parse_kernel_version;
+
+ #[test]
+ #[cfg(target_os = "linux")]
+ fn test_parse_standard_kernel_version() {
+ assert_eq!(parse_kernel_version("6.8.0-45-generic"), Some((6, 8)));
+ }
+
+ #[test]
+ #[cfg(target_os = "linux")]
+ fn test_parse_wsl2_kernel_version() {
+ assert_eq!(
+ parse_kernel_version("5.15.153.1-microsoft-standard-WSL2"),
+ Some((5, 15))
+ );
+ }
+
+ #[test]
+ #[cfg(target_os = "linux")]
+ fn test_parse_minimal_version() {
+ assert_eq!(parse_kernel_version("5.19"), Some((5, 19)));
+ }
+
+ #[test]
+ #[cfg(target_os = "linux")]
+ fn test_parse_garbage_returns_none() {
+ assert_eq!(parse_kernel_version("not-a-version"), None);
+ }
+
+ #[test]
+ #[cfg(target_os = "linux")]
+ fn test_parse_empty_returns_none() {
+ assert_eq!(parse_kernel_version(""), None);
+ }
+}
diff --git a/core/server/src/main.rs b/core/server/src/main.rs
index 1a13ba31f..49c5496c7 100644
--- a/core/server/src/main.rs
+++ b/core/server/src/main.rs
@@ -32,7 +32,10 @@ use server::bootstrap::{
create_directories, create_shard_connections, create_shard_executor,
load_config,
load_metadata, resolve_persister, update_system_info,
};
-use server::diagnostics::{print_io_uring_permission_info,
print_locked_memory_limit_info};
+use server::diagnostics::{
+ print_invalid_io_uring_args_info, print_io_uring_permission_info,
+ print_locked_memory_limit_info,
+};
use server::io::fs_utils;
use server::log::logger::Logging;
use server::metadata::{Metadata, create_metadata_handles};
@@ -60,6 +63,7 @@ const SHARDS_TABLE_CAPACITY: usize = 16384;
static SHUTDOWN_START_TIME: AtomicU64 = AtomicU64::new(0);
static SHUTDOWN_INITIATED: AtomicBool = AtomicBool::new(false);
+static SHARD_EXECUTOR_DIAGNOSTIC: std::sync::Once = std::sync::Once::new();
enum ShardExitStatus {
Success,
@@ -395,7 +399,27 @@ fn main() -> Result<(), ServerError> {
error!("Failed to bind memory: {e:?}");
}
- let rt = create_shard_executor();
+ let rt = match create_shard_executor() {
+ Ok(rt) => rt,
+ Err(e) => {
+ match e.kind() {
+ std::io::ErrorKind::InvalidInput => {
+ SHARD_EXECUTOR_DIAGNOSTIC
+
.call_once(print_invalid_io_uring_args_info);
+ }
+ std::io::ErrorKind::OutOfMemory => {
+ SHARD_EXECUTOR_DIAGNOSTIC
+
.call_once(print_locked_memory_limit_info);
+ }
+ std::io::ErrorKind::PermissionDenied => {
+ SHARD_EXECUTOR_DIAGNOSTIC
+
.call_once(print_io_uring_permission_info);
+ }
+ _ => {}
+ }
+ panic!("Cannot create shard-{id} executor:
{e}");
+ }
+ };
rt.block_on(async move {
let mut builder = IggyShard::builder();
builder = builder