This is an automated email from the ASF dual-hosted git repository.

hgruszecki pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/iggy.git


The following commit(s) were added to refs/heads/master by this push:
     new 484f98a59 feat(server): add io_uring EINVAL diagnostics for shard 
executors (#3021)
484f98a59 is described below

commit 484f98a5989764ffc28a5e863d753f6b6e9b2954
Author: Hubert Gruszecki <[email protected]>
AuthorDate: Tue Mar 24 12:56:02 2026 +0100

    feat(server): add io_uring EINVAL diagnostics for shard executors (#3021)
---
 Cargo.toml                     |   2 +-
 core/server/src/bootstrap.rs   |   6 +-
 core/server/src/diagnostics.rs | 226 +++++++++++++++++++++++++++++++++++++++++
 core/server/src/main.rs        |  28 ++++-
 4 files changed, 257 insertions(+), 5 deletions(-)

diff --git a/Cargo.toml b/Cargo.toml
index bdd7d84a1..6ac198684 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -184,7 +184,7 @@ mime_guess = "2.0"
 mockall = "0.14.0"
 moka = { version = "0.12.14", features = ["future"] }
 mongodb = { version = "3.5.2", features = ["rustls-tls"] }
-nix = { version = "0.31.2", features = ["fs", "resource", "sched"] }
+nix = { version = "0.31.2", features = ["feature", "fs", "resource", "sched"] }
 nonzero_lit = "0.1.2"
 notify = "8.2.0"
 octocrab = "0.49.5"
diff --git a/core/server/src/bootstrap.rs b/core/server/src/bootstrap.rs
index a874fd068..3a5661bce 100644
--- a/core/server/src/bootstrap.rs
+++ b/core/server/src/bootstrap.rs
@@ -165,7 +165,10 @@ pub fn create_root_user() -> User {
     User::root(&username, &password)
 }
 
-pub fn create_shard_executor() -> Runtime {
+// Shard executors require IORING_SETUP_COOP_TASKRUN for predictable latency.
+// Falling back to default flags would silently degrade shard performance -
+// do not add a retry with reduced flags here.
+pub fn create_shard_executor() -> Result<Runtime, std::io::Error> {
     // TODO: The event interval tick, could be configured based on the fact
     // How many clients we expect to have connected.
     // This roughly estimates the number of tasks we will create.
@@ -186,7 +189,6 @@ pub fn create_shard_executor() -> Runtime {
         .with_proactor(proactor.to_owned())
         .event_interval(128)
         .build()
-        .unwrap()
 }
 
 pub fn resolve_persister(enforce_fsync: bool) -> Arc<PersisterKind> {
diff --git a/core/server/src/diagnostics.rs b/core/server/src/diagnostics.rs
index 6029cbf88..32a19ea3b 100644
--- a/core/server/src/diagnostics.rs
+++ b/core/server/src/diagnostics.rs
@@ -16,6 +16,15 @@
  * under the License.
  */
 
+#[cfg(target_os = "linux")]
+const DISCORD_SUPPORT_URL: &str = "https://discord.gg/apache-iggy";;
+
+#[cfg(target_os = "linux")]
+fn print_discord_link() {
+    eprintln!("  Need help? Join our Discord: {DISCORD_SUPPORT_URL}");
+    eprintln!();
+}
+
 /// Prints information about locked memory limits when runtime creation fails.
 /// This is typically needed when io_uring cannot allocate memory due to 
RLIMIT_MEMLOCK.
 #[cfg(target_os = "linux")]
@@ -71,6 +80,7 @@ pub fn print_locked_memory_limit_info() {
     eprintln!("  5. For systemd services (add to service file):");
     eprintln!("     LimitMEMLOCK=infinity");
     eprintln!();
+    print_discord_link();
 }
 
 /// Prints information about io_uring permission issues in containerized 
environments.
@@ -101,6 +111,180 @@ pub fn print_io_uring_permission_info() {
     eprintln!("       seccompProfile:");
     eprintln!("         type: Unconfined");
     eprintln!();
+    print_discord_link();
+}
+
+/// Minimum kernel version for IORING_SETUP_COOP_TASKRUN and 
IORING_SETUP_TASKRUN_FLAG.
+#[cfg(target_os = "linux")]
+const MIN_KERNEL_MAJOR: u32 = 5;
+#[cfg(target_os = "linux")]
+const MIN_KERNEL_MINOR: u32 = 19;
+
+/// Minimum kernel version for kernel.io_uring_disabled sysctl.
+#[cfg(target_os = "linux")]
+const SYSCTL_IO_URING_DISABLED_KERNEL_MAJOR: u32 = 6;
+#[cfg(target_os = "linux")]
+const SYSCTL_IO_URING_DISABLED_KERNEL_MINOR: u32 = 1;
+
+/// Prints diagnostic information when io_uring setup fails with EINVAL.
+///
+/// This typically occurs when the kernel does not support the io_uring flags
+/// required by shard executors (IORING_SETUP_COOP_TASKRUN, 
IORING_SETUP_TASKRUN_FLAG).
+/// The caller is responsible for deduplication (e.g., via `std::sync::Once`).
+#[cfg(target_os = "linux")]
+pub fn print_invalid_io_uring_args_info() {
+    use nix::sys::utsname::uname;
+    use std::fs;
+
+    eprintln!();
+    eprintln!("=== io_uring Invalid Argument (EINVAL) ===");
+    eprintln!();
+    eprintln!("The shard executor failed to initialize because the kernel 
rejected");
+    eprintln!("io_uring setup flags required for shard operation.");
+    eprintln!();
+    eprintln!("  The main thread's io_uring runtime uses default settings and 
initialized");
+    eprintln!("  successfully. Shard executors require additional flags:");
+    eprintln!("    - IORING_SETUP_COOP_TASKRUN (cooperative task running)");
+    eprintln!("    - IORING_SETUP_TASKRUN_FLAG (task runner flag 
notification)");
+    eprintln!(
+        "  These flags require Linux kernel >= 
{MIN_KERNEL_MAJOR}.{MIN_KERNEL_MINOR} with full io_uring support."
+    );
+    eprintln!();
+
+    let mut detected_issues: Vec<String> = Vec::new();
+
+    // 1. Kernel version check
+    let uname_info = match uname() {
+        Ok(info) => Some(info),
+        Err(_) => {
+            eprintln!("  [!] Could not retrieve kernel information via 
uname(2).");
+            None
+        }
+    };
+
+    let mut kernel_version: Option<(u32, u32)> = None;
+
+    if let Some(ref info) = uname_info {
+        let release = info.release().to_string_lossy();
+        eprintln!("  Kernel release: {release}");
+
+        if let Some((major, minor)) = parse_kernel_version(&release) {
+            kernel_version = Some((major, minor));
+            if (major, minor) < (MIN_KERNEL_MAJOR, MIN_KERNEL_MINOR) {
+                detected_issues.push(format!(
+                    "Kernel {major}.{minor} is too old (need >= 
{MIN_KERNEL_MAJOR}.{MIN_KERNEL_MINOR})"
+                ));
+            }
+        } else {
+            eprintln!("  [!] Could not parse kernel version from release 
string.");
+        }
+
+        // 2. WSL2 detection
+        let release_is_wsl = release.contains("microsoft") || 
release.contains("Microsoft");
+        let proc_version_is_wsl = fs::read_to_string("/proc/version")
+            .map(|v| v.contains("Microsoft") || v.contains("microsoft"))
+            .unwrap_or(false);
+
+        if release_is_wsl || proc_version_is_wsl {
+            eprintln!("  Environment: WSL2 (Microsoft kernel fork detected)");
+            detected_issues.push(
+                "WSL2 kernel may not support IORING_SETUP_COOP_TASKRUN even if 
version >= 5.19"
+                    .to_string(),
+            );
+        }
+    }
+
+    // 3. kernel.io_uring_disabled sysctl (available since kernel 6.1)
+    match fs::read_to_string("/proc/sys/kernel/io_uring_disabled") {
+        Ok(value) => {
+            let value = value.trim();
+            eprintln!("  kernel.io_uring_disabled = {value}");
+            match value {
+                "1" => detected_issues
+                    .push("io_uring is disabled for unprivileged users (sysctl 
= 1)".to_string()),
+                "2" => detected_issues
+                    .push("io_uring is fully disabled by sysctl (sysctl = 
2)".to_string()),
+                _ => {}
+            }
+        }
+        Err(_) => {
+            // The sysctl was introduced in kernel 6.1. If the file is absent 
on a kernel >= 6.1,
+            // io_uring is likely not compiled in (CONFIG_IO_URING=n).
+            if let Some((major, minor)) = kernel_version
+                && (major, minor)
+                    >= (
+                        SYSCTL_IO_URING_DISABLED_KERNEL_MAJOR,
+                        SYSCTL_IO_URING_DISABLED_KERNEL_MINOR,
+                    )
+            {
+                detected_issues.push(format!(
+                    "kernel.io_uring_disabled sysctl not found on kernel >= \
+                     
{SYSCTL_IO_URING_DISABLED_KERNEL_MAJOR}.{SYSCTL_IO_URING_DISABLED_KERNEL_MINOR} 
\
+                     - io_uring may not be compiled in (CONFIG_IO_URING=n)"
+                ));
+            }
+        }
+    }
+
+    // 4. AppArmor - informational only, not added to detected_issues
+    let apparmor_profile = 
fs::read_to_string("/proc/self/attr/apparmor/current")
+        .ok()
+        .map(|s| s.trim().to_string());
+
+    if let Some(ref profile) = apparmor_profile
+        && profile != "unconfined"
+        && !profile.is_empty()
+    {
+        eprintln!("  AppArmor profile: {profile}");
+    }
+
+    // Print detected issues
+    if detected_issues.is_empty() {
+        eprintln!();
+        eprintln!("  No specific issue was detected. The kernel may lack 
io_uring support");
+        eprintln!("  for the flags used by Iggy's shard executors.");
+    } else {
+        eprintln!();
+        eprintln!("  Detected issues:");
+        for (i, issue) in detected_issues.iter().enumerate() {
+            eprintln!("    {}. {issue}", i + 1);
+        }
+    }
+
+    eprintln!();
+    eprintln!("  To resolve this:");
+    eprintln!();
+    eprintln!(
+        "  1. Upgrade to Linux kernel >= {MIN_KERNEL_MAJOR}.{MIN_KERNEL_MINOR} 
(>= 
{SYSCTL_IO_URING_DISABLED_KERNEL_MAJOR}.{SYSCTL_IO_URING_DISABLED_KERNEL_MINOR} 
recommended)"
+    );
+    eprintln!();
+    eprintln!("  2. If running under WSL2:");
+    eprintln!("     - Update WSL: wsl --update  (from PowerShell)");
+    eprintln!("     - Or build a custom kernel with full io_uring support:");
+    eprintln!("       
https://learn.microsoft.com/en-us/windows/wsl/wsl-config#wsl-2-settings";);
+    eprintln!("     - Or use Docker Desktop / a native Linux VM instead of 
WSL2");
+    eprintln!();
+    eprintln!("  3. If io_uring is disabled via sysctl:");
+    eprintln!("     sudo sysctl -w kernel.io_uring_disabled=0");
+    eprintln!();
+    eprintln!("  4. If AppArmor is restricting io_uring:");
+    eprintln!("     sudo aa-complain <profile-name>");
+    eprintln!();
+    eprintln!("  5. Check kernel logs for more details:");
+    eprintln!("     dmesg | grep -i io_uring");
+    eprintln!();
+    print_discord_link();
+}
+
+/// Parses "major.minor[.patch...][-suffix]" from a kernel release string.
+#[cfg(target_os = "linux")]
+fn parse_kernel_version(release: &str) -> Option<(u32, u32)> {
+    let mut parts = release
+        .split(|c: char| !c.is_ascii_digit())
+        .filter(|s| !s.is_empty());
+    let major = parts.next()?.parse::<u32>().ok()?;
+    let minor = parts.next()?.parse::<u32>().ok()?;
+    Some((major, minor))
 }
 
 #[cfg(not(target_os = "linux"))]
@@ -108,3 +292,45 @@ pub fn print_locked_memory_limit_info() {}
 
 #[cfg(not(target_os = "linux"))]
 pub fn print_io_uring_permission_info() {}
+
+#[cfg(not(target_os = "linux"))]
+pub fn print_invalid_io_uring_args_info() {}
+
+#[cfg(test)]
+mod tests {
+    #[cfg(target_os = "linux")]
+    use super::parse_kernel_version;
+
+    #[test]
+    #[cfg(target_os = "linux")]
+    fn test_parse_standard_kernel_version() {
+        assert_eq!(parse_kernel_version("6.8.0-45-generic"), Some((6, 8)));
+    }
+
+    #[test]
+    #[cfg(target_os = "linux")]
+    fn test_parse_wsl2_kernel_version() {
+        assert_eq!(
+            parse_kernel_version("5.15.153.1-microsoft-standard-WSL2"),
+            Some((5, 15))
+        );
+    }
+
+    #[test]
+    #[cfg(target_os = "linux")]
+    fn test_parse_minimal_version() {
+        assert_eq!(parse_kernel_version("5.19"), Some((5, 19)));
+    }
+
+    #[test]
+    #[cfg(target_os = "linux")]
+    fn test_parse_garbage_returns_none() {
+        assert_eq!(parse_kernel_version("not-a-version"), None);
+    }
+
+    #[test]
+    #[cfg(target_os = "linux")]
+    fn test_parse_empty_returns_none() {
+        assert_eq!(parse_kernel_version(""), None);
+    }
+}
diff --git a/core/server/src/main.rs b/core/server/src/main.rs
index 1a13ba31f..49c5496c7 100644
--- a/core/server/src/main.rs
+++ b/core/server/src/main.rs
@@ -32,7 +32,10 @@ use server::bootstrap::{
     create_directories, create_shard_connections, create_shard_executor, 
load_config,
     load_metadata, resolve_persister, update_system_info,
 };
-use server::diagnostics::{print_io_uring_permission_info, 
print_locked_memory_limit_info};
+use server::diagnostics::{
+    print_invalid_io_uring_args_info, print_io_uring_permission_info,
+    print_locked_memory_limit_info,
+};
 use server::io::fs_utils;
 use server::log::logger::Logging;
 use server::metadata::{Metadata, create_metadata_handles};
@@ -60,6 +63,7 @@ const SHARDS_TABLE_CAPACITY: usize = 16384;
 
 static SHUTDOWN_START_TIME: AtomicU64 = AtomicU64::new(0);
 static SHUTDOWN_INITIATED: AtomicBool = AtomicBool::new(false);
+static SHARD_EXECUTOR_DIAGNOSTIC: std::sync::Once = std::sync::Once::new();
 
 enum ShardExitStatus {
     Success,
@@ -395,7 +399,27 @@ fn main() -> Result<(), ServerError> {
                             error!("Failed to bind memory: {e:?}");
                         }
 
-                        let rt = create_shard_executor();
+                        let rt = match create_shard_executor() {
+                            Ok(rt) => rt,
+                            Err(e) => {
+                                match e.kind() {
+                                    std::io::ErrorKind::InvalidInput => {
+                                        SHARD_EXECUTOR_DIAGNOSTIC
+                                            
.call_once(print_invalid_io_uring_args_info);
+                                    }
+                                    std::io::ErrorKind::OutOfMemory => {
+                                        SHARD_EXECUTOR_DIAGNOSTIC
+                                            
.call_once(print_locked_memory_limit_info);
+                                    }
+                                    std::io::ErrorKind::PermissionDenied => {
+                                        SHARD_EXECUTOR_DIAGNOSTIC
+                                            
.call_once(print_io_uring_permission_info);
+                                    }
+                                    _ => {}
+                                }
+                                panic!("Cannot create shard-{id} executor: 
{e}");
+                            }
+                        };
                         rt.block_on(async move {
                             let mut builder = IggyShard::builder();
                             builder = builder

Reply via email to