This is an automated email from the ASF dual-hosted git repository.
curth pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-dotnet.git
The following commit(s) were added to refs/heads/main by this push:
new fc49cf3 GH-301: [C#] Use an index lookup for O(1) field index access
(#300)
fc49cf3 is described below
commit fc49cf334a5507829ac46e62de6c93022a32ca22
Author: Vasilis Themelis <[email protected]>
AuthorDate: Tue Mar 31 01:49:21 2026 +0100
GH-301: [C#] Use an index lookup for O(1) field index access (#300)
Closes #301.
Ports the optimization from the closed PR at
https://github.com/apache/arrow/pull/44633 into the new .NET-specific
repository.
The original PR was closed on November 18, 2025 with the note that the
C# implementation had moved to a new repository.
This version keeps the current `arrow-dotnet` behavior intact:
- `GetFieldIndex(..., comparer: null)` and the default path now use a
cached `CurrentCulture` index lookup for the common case.
- Missing fields still return `-1`.
- Duplicate field names still return the first match.
- Non-default comparers still fall back to the existing linear scan.
I also added dedicated schema tests covering:
- `null`, `Ordinal`, `OrdinalIgnoreCase`, and `CurrentCulture` comparers
- duplicate-name lookup returning the first match
- missing-name behavior for each comparer
Local verification:
- `dotnet build test/Apache.Arrow.Tests/Apache.Arrow.Tests.csproj`
- `DOTNET_ROLL_FORWARD=Major DOTNET_ROLL_FORWARD_TO_PRERELEASE=1 dotnet
test test/Apache.Arrow.Tests/Apache.Arrow.Tests.csproj --no-restore
--logger 'console;verbosity=minimal'`
---
src/Apache.Arrow/Schema.cs | 23 +++++-----
test/Apache.Arrow.Tests/SchemaBuilderTests.cs | 11 +++++
test/Apache.Arrow.Tests/SchemaTests.cs | 65 +++++++++++++++++++++++++++
3 files changed, 88 insertions(+), 11 deletions(-)
diff --git a/src/Apache.Arrow/Schema.cs b/src/Apache.Arrow/Schema.cs
index 04e0e83..528adb9 100644
--- a/src/Apache.Arrow/Schema.cs
+++ b/src/Apache.Arrow/Schema.cs
@@ -31,6 +31,7 @@ namespace Apache.Arrow
private readonly List<Field> _fieldsList;
public ILookup<string, Field> FieldsLookup { get; }
+ private readonly ILookup<string, int> _fieldsIndexLookup;
public IReadOnlyDictionary<string, string> Metadata { get; }
@@ -43,17 +44,11 @@ namespace Apache.Arrow
public Schema(
IEnumerable<Field> fields,
IEnumerable<KeyValuePair<string, string>> metadata)
+ : this(
+ fields?.ToList() ?? throw new
ArgumentNullException(nameof(fields)),
+ metadata?.ToDictionary(kv => kv.Key, kv => kv.Value),
+ false)
{
- if (fields is null)
- {
- throw new ArgumentNullException(nameof(fields));
- }
-
- _fieldsList = fields.ToList();
- FieldsLookup = _fieldsList.ToLookup(f => f.Name);
- _fieldsDictionary = FieldsLookup.ToDictionary(g => g.Key, g =>
g.First());
-
- Metadata = metadata?.ToDictionary(kv => kv.Key, kv => kv.Value);
}
internal Schema(List<Field> fieldsList, IReadOnlyDictionary<string,
string> metadata, bool copyCollections)
@@ -64,6 +59,9 @@ namespace Apache.Arrow
_fieldsList = fieldsList;
FieldsLookup = _fieldsList.ToLookup(f => f.Name);
_fieldsDictionary = FieldsLookup.ToDictionary(g => g.Key, g =>
g.First());
+ _fieldsIndexLookup = _fieldsList
+ .Select((field, index) => (field.Name, index))
+ .ToLookup(item => item.Name, item => item.index,
StringComparer.CurrentCulture);
Metadata = metadata;
}
@@ -80,7 +78,10 @@ namespace Apache.Arrow
public int GetFieldIndex(string name, IEqualityComparer<string>
comparer = default)
{
- comparer ??= StringComparer.CurrentCulture;
+ if (comparer == null || ReferenceEquals(comparer,
StringComparer.CurrentCulture))
+ {
+ return _fieldsIndexLookup[name].DefaultIfEmpty(-1).First();
+ }
for (int i = 0; i < _fieldsList.Count; i++)
{
diff --git a/test/Apache.Arrow.Tests/SchemaBuilderTests.cs
b/test/Apache.Arrow.Tests/SchemaBuilderTests.cs
index 2691e20..e8cb1fb 100644
--- a/test/Apache.Arrow.Tests/SchemaBuilderTests.cs
+++ b/test/Apache.Arrow.Tests/SchemaBuilderTests.cs
@@ -117,6 +117,17 @@ namespace Apache.Arrow.Tests
Assert.Equal(2, schema.GetFieldIndex("f1"));
}
+ [Fact]
+ public void
GetFieldIndexWithComparerReturnsMinusOneWhenFieldDoesNotExist()
+ {
+ var schema = new Schema.Builder()
+ .Field(f => f.Name("f0").DataType(Int32Type.Default))
+ .Build();
+
+ Assert.Equal(-1, schema.GetFieldIndex("F0",
StringComparer.Ordinal));
+ Assert.Equal(-1, schema.GetFieldIndex("f1",
StringComparer.OrdinalIgnoreCase));
+ }
+
[Fact]
public void GetFieldByName()
{
diff --git a/test/Apache.Arrow.Tests/SchemaTests.cs
b/test/Apache.Arrow.Tests/SchemaTests.cs
new file mode 100644
index 0000000..f18cc7c
--- /dev/null
+++ b/test/Apache.Arrow.Tests/SchemaTests.cs
@@ -0,0 +1,65 @@
+// Licensed to the Apache Software Foundation (ASF) under one or more
+// contributor license agreements. See the NOTICE file distributed with
+// this work for additional information regarding copyright ownership.
+// The ASF licenses this file to You under the Apache License, Version 2.0
+// (the "License"); you may not use this file except in compliance with
+// the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+using System;
+using System.Collections.Generic;
+using Apache.Arrow.Types;
+using Xunit;
+
+namespace Apache.Arrow.Tests;
+
+public class SchemaTests
+{
+ [Fact]
+ public void ThrowsWhenFieldsAreNull()
+ {
+ Assert.Throws<ArgumentNullException>(() => new Schema(null, null));
+ }
+
+ [Theory]
+ [MemberData(nameof(StringComparers))]
+ public void CanRetrieveFieldIndexByName(StringComparer comparer)
+ {
+ var field0 = new Field("f0", Int32Type.Default, true);
+ var field1 = new Field("f1", Int64Type.Default, true);
+ var schema = new Schema(new[] { field0, field1 }, null);
+
+ Assert.Equal(0, schema.GetFieldIndex("f0", comparer));
+ Assert.Equal(1, schema.GetFieldIndex("f1", comparer));
+ Assert.Equal(-1, schema.GetFieldIndex("nonexistent", comparer));
+ }
+
+ [Theory]
+ [MemberData(nameof(StringComparers))]
+ public void CanRetrieveFieldIndexByNonUniqueName(StringComparer comparer)
+ {
+ var field0 = new Field("f0", Int32Type.Default, true);
+ var field1 = new Field("f1", Int64Type.Default, true);
+
+ var schema = new Schema(new[] { field0, field1, field0, field1 },
null);
+
+ Assert.Equal(0, schema.GetFieldIndex("f0", comparer));
+ Assert.Equal(1, schema.GetFieldIndex("f1", comparer));
+ Assert.Equal(-1, schema.GetFieldIndex("nonexistent", comparer));
+ }
+
+ public static IEnumerable<object[]> StringComparers()
+ {
+ yield return new object[] { null };
+ yield return new object[] { StringComparer.Ordinal };
+ yield return new object[] { StringComparer.OrdinalIgnoreCase };
+ yield return new object[] { StringComparer.CurrentCulture };
+ }
+}