This is an automated email from the ASF dual-hosted git repository. curth pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/arrow-adbc.git
The following commit(s) were added to refs/heads/main by this push: new 07243a4f2 feat(csharp/src/Drivers/BigQuery): use a default project ID if one is not specified (#2471) 07243a4f2 is described below commit 07243a4f228a3107a70cb4c760374714cc5b2062 Author: davidhcoe <13318837+davidh...@users.noreply.github.com> AuthorDate: Tue Jan 21 08:50:32 2025 -0500 feat(csharp/src/Drivers/BigQuery): use a default project ID if one is not specified (#2471) - No longer requires a project ID to be specified in the parameters - If not specified, will use a default project ID (same as the Go driver) - Adds parameter to include the public project ID if desired --------- Co-authored-by: David Coe <> --- csharp/src/Drivers/BigQuery/BigQueryConnection.cs | 29 +++++++++++++++---- csharp/src/Drivers/BigQuery/BigQueryParameters.cs | 4 +++ csharp/src/Drivers/BigQuery/readme.md | 5 +++- .../Drivers/BigQuery/BigQueryTestConfiguration.cs | 5 +++- .../test/Drivers/BigQuery/BigQueryTestingUtils.cs | 10 +++++-- csharp/test/Drivers/BigQuery/DriverTests.cs | 33 ++++++++++++++++++++-- 6 files changed, 73 insertions(+), 13 deletions(-) diff --git a/csharp/src/Drivers/BigQuery/BigQueryConnection.cs b/csharp/src/Drivers/BigQuery/BigQueryConnection.cs index 191526399..a0c70c63f 100644 --- a/csharp/src/Drivers/BigQuery/BigQueryConnection.cs +++ b/csharp/src/Drivers/BigQuery/BigQueryConnection.cs @@ -41,11 +41,13 @@ namespace Apache.Arrow.Adbc.Drivers.BigQuery readonly IReadOnlyDictionary<string, string> properties; BigQueryClient? client; GoogleCredential? credential; + bool includePublicProjectIds = false; const string infoDriverName = "ADBC BigQuery Driver"; const string infoDriverVersion = "1.0.0"; const string infoVendorName = "BigQuery"; const string infoDriverArrowVersion = "1.0.0"; + const string publicProjectId = "bigquery-public-data"; readonly AdbcInfoCode[] infoSupportedCodes = new[] { AdbcInfoCode.DriverName, @@ -81,8 +83,15 @@ namespace Apache.Arrow.Adbc.Drivers.BigQuery // TODO: handle token expiration + // if the caller doesn't specify a projectId, use the default if (!this.properties.TryGetValue(BigQueryParameters.ProjectId, out projectId)) - throw new ArgumentException($"The {BigQueryParameters.ProjectId} parameter is not present"); + projectId = BigQueryConstants.DetectProjectId; + + if (this.properties.TryGetValue(BigQueryParameters.IncludePublicProjectId, out string? result)) + { + if (!string.IsNullOrEmpty(result)) + includePublicProjectIds = Convert.ToBoolean(result); + } if (this.properties.TryGetValue(BigQueryParameters.AuthenticationType, out string? newAuthenticationType)) { @@ -308,11 +317,18 @@ namespace Apache.Arrow.Adbc.Drivers.BigQuery if (catalogs != null) { - foreach (CloudProject catalog in catalogs) + List<string> projectIds = catalogs.Select(x => x.ProjectId).ToList(); + + if (this.includePublicProjectIds && !projectIds.Contains(publicProjectId)) + projectIds.Add(publicProjectId); + + projectIds.Sort(); + + foreach (string projectId in projectIds) { - if (Regex.IsMatch(catalog.ProjectId, catalogRegexp, RegexOptions.IgnoreCase)) + if (Regex.IsMatch(projectId, catalogRegexp, RegexOptions.IgnoreCase)) { - catalogNameBuilder.Append(catalog.ProjectId); + catalogNameBuilder.Append(projectId); if (depth == GetObjectsDepth.Catalogs) { @@ -321,7 +337,7 @@ namespace Apache.Arrow.Adbc.Drivers.BigQuery else { catalogDbSchemasValues.Add(GetDbSchemas( - depth, catalog.ProjectId, dbSchemaPattern, + depth, projectId, dbSchemaPattern, tableNamePattern, tableTypes, columnNamePattern)); } } @@ -333,6 +349,7 @@ namespace Apache.Arrow.Adbc.Drivers.BigQuery catalogNameBuilder.Build(), catalogDbSchemasValues.BuildListArrayForType(new StructType(StandardSchemas.DbSchemaSchema)), }; + StandardSchemas.GetObjectsSchema.Validate(dataArrays); return dataArrays; @@ -994,7 +1011,7 @@ namespace Apache.Arrow.Adbc.Drivers.BigQuery { if (this.credential == null) { - throw new InvalidOperationException(); + throw new AdbcException("A credential must be set", AdbcStatusCode.Unauthenticated); } if (this.client == null) diff --git a/csharp/src/Drivers/BigQuery/BigQueryParameters.cs b/csharp/src/Drivers/BigQuery/BigQueryParameters.cs index 51272eb64..101a2dafa 100644 --- a/csharp/src/Drivers/BigQuery/BigQueryParameters.cs +++ b/csharp/src/Drivers/BigQuery/BigQueryParameters.cs @@ -36,6 +36,7 @@ namespace Apache.Arrow.Adbc.Drivers.BigQuery public const string IncludeConstraintsWithGetObjects = "adbc.bigquery.include_constraints_getobjects"; public const string GetQueryResultsOptionsTimeoutMinutes = "adbc.bigquery.get_query_results_options.timeout"; public const string MaxFetchConcurrency = "adbc.bigquery.max_fetch_concurrency"; + public const string IncludePublicProjectId = "adbc.bigquery.include_public_project_id"; } /// <summary> @@ -47,5 +48,8 @@ namespace Apache.Arrow.Adbc.Drivers.BigQuery public const string ServiceAccountAuthenticationType = "service"; public const string TokenEndpoint = "https://accounts.google.com/o/oauth2/token"; public const string TreatLargeDecimalAsString = "true"; + + // default value per https://pkg.go.dev/cloud.google.com/go/bigquery#section-readme + public const string DetectProjectId = "*detect-project-id*"; } } diff --git a/csharp/src/Drivers/BigQuery/readme.md b/csharp/src/Drivers/BigQuery/readme.md index 92af3ef1e..f7adb3958 100644 --- a/csharp/src/Drivers/BigQuery/readme.md +++ b/csharp/src/Drivers/BigQuery/readme.md @@ -64,7 +64,10 @@ https://cloud.google.com/dotnet/docs/reference/Google.Cloud.BigQuery.V2/latest/G Optional. Sets the [DestinationTable](https://cloud.google.com/dotnet/docs/reference/Google.Cloud.BigQuery.V2/latest/Google.Cloud.BigQuery.V2.QueryOptions#Google_Cloud_BigQuery_V2_QueryOptions_DestinationTable) value of the QueryOptions if configured. Expects the format to be `{projectId}.{datasetId}.{tableId}` to set the corresponding values in the [TableReference](https://github.com/googleapis/google-api-dotnet-client/blob/6c415c73788b848711e47c6dd33c2f93c76faf9 [...] **adbc.bigquery.project_id**<br> - The [Project ID](https://cloud.google.com/resource-manager/docs/creating-managing-projects) used for accessing BigQuery. + The [Project ID](https://cloud.google.com/resource-manager/docs/creating-managing-projects) used for accessing BigQuery. If not specified, will default to detect the projectIds the credentials have access to. + +**adbc.bigquery.include_public_project_id**<br> + Include the `bigquery-public-data` project ID with the list of project IDs. **adbc.bigquery.refresh_token**<br> The refresh token used for when the generated OAuth token expires. Required for `user` authentication. diff --git a/csharp/test/Drivers/BigQuery/BigQueryTestConfiguration.cs b/csharp/test/Drivers/BigQuery/BigQueryTestConfiguration.cs index ffcae7cc0..17c228ac9 100644 --- a/csharp/test/Drivers/BigQuery/BigQueryTestConfiguration.cs +++ b/csharp/test/Drivers/BigQuery/BigQueryTestConfiguration.cs @@ -31,7 +31,7 @@ namespace Apache.Arrow.Adbc.Tests.Drivers.BigQuery } [JsonPropertyName("projectId")] - public string ProjectId { get; set; } = string.Empty; + public string? ProjectId { get; set; } [JsonPropertyName("clientId")] public string ClientId { get; set; } = string.Empty; @@ -57,6 +57,9 @@ namespace Apache.Arrow.Adbc.Tests.Drivers.BigQuery [JsonPropertyName("includeTableConstraints")] public bool IncludeTableConstraints { get; set; } + [JsonPropertyName("includePublicProjectId")] + public bool IncludePublicProjectId { get; set; } = false; + [JsonPropertyName("timeoutMinutes")] public int? TimeoutMinutes { get; set; } diff --git a/csharp/test/Drivers/BigQuery/BigQueryTestingUtils.cs b/csharp/test/Drivers/BigQuery/BigQueryTestingUtils.cs index 2bc6227ba..ff7e36139 100644 --- a/csharp/test/Drivers/BigQuery/BigQueryTestingUtils.cs +++ b/csharp/test/Drivers/BigQuery/BigQueryTestingUtils.cs @@ -51,10 +51,12 @@ namespace Apache.Arrow.Adbc.Tests.Drivers.BigQuery /// <returns></returns> internal static Dictionary<string, string> GetBigQueryParameters(BigQueryTestConfiguration testConfiguration) { - Dictionary<string, string> parameters = new Dictionary<string, string> + Dictionary<string, string> parameters = new Dictionary<string, string>{}; + + if (!string.IsNullOrEmpty(testConfiguration.ProjectId)) { - { BigQueryParameters.ProjectId, testConfiguration.ProjectId }, - }; + parameters.Add(BigQueryParameters.ProjectId, testConfiguration.ProjectId!); + } if (!string.IsNullOrEmpty(testConfiguration.JsonCredential)) { @@ -81,6 +83,8 @@ namespace Apache.Arrow.Adbc.Tests.Drivers.BigQuery parameters.Add(BigQueryParameters.IncludeConstraintsWithGetObjects, testConfiguration.IncludeTableConstraints.ToString()); + parameters.Add(BigQueryParameters.IncludePublicProjectId, testConfiguration.IncludePublicProjectId.ToString()); + if (!string.IsNullOrEmpty(testConfiguration.LargeResultsDestinationTable)) { parameters.Add(BigQueryParameters.LargeResultsDestinationTable, testConfiguration.LargeResultsDestinationTable); diff --git a/csharp/test/Drivers/BigQuery/DriverTests.cs b/csharp/test/Drivers/BigQuery/DriverTests.cs index 579a2a83c..58db4db16 100644 --- a/csharp/test/Drivers/BigQuery/DriverTests.cs +++ b/csharp/test/Drivers/BigQuery/DriverTests.cs @@ -23,6 +23,7 @@ using Apache.Arrow.Adbc.Tests.Metadata; using Apache.Arrow.Adbc.Tests.Xunit; using Apache.Arrow.Ipc; using Xunit; +using Xunit.Abstractions; namespace Apache.Arrow.Adbc.Tests.Drivers.BigQuery { @@ -37,12 +38,14 @@ namespace Apache.Arrow.Adbc.Tests.Drivers.BigQuery public class DriverTests { BigQueryTestConfiguration _testConfiguration; + readonly ITestOutputHelper? outputHelper; - public DriverTests() + public DriverTests(ITestOutputHelper? outputHelper) { + this.outputHelper = outputHelper; + Skip.IfNot(Utils.CanExecuteTestConfig(BigQueryTestingUtils.BIGQUERY_TEST_CONFIG_VARIABLE)); _testConfiguration = Utils.LoadTestConfiguration<BigQueryTestConfiguration>(BigQueryTestingUtils.BIGQUERY_TEST_CONFIG_VARIABLE); - } /// <summary> @@ -98,6 +101,32 @@ namespace Apache.Arrow.Adbc.Tests.Drivers.BigQuery } } + /// <summary> + /// Validates if the driver can call GetObjects. + /// </summary> + [SkippableFact, Order(3)] + public void CanGetObjectsAllCatalogs() + { + AdbcConnection adbcConnection = BigQueryTestingUtils.GetBigQueryAdbcConnection(_testConfiguration); + + IArrowArrayStream stream = adbcConnection.GetObjects( + depth: AdbcConnection.GetObjectsDepth.Catalogs, + catalogPattern: null, + dbSchemaPattern: null, + tableNamePattern: null, + tableTypes: BigQueryTableTypes.TableTypes, + columnNamePattern: null); + + RecordBatch recordBatch = stream.ReadNextRecordBatchAsync().Result; + + List<AdbcCatalog> catalogs = GetObjectsParser.ParseCatalog(recordBatch, null); + + foreach (AdbcCatalog ct in catalogs) + { + this.outputHelper?.WriteLine(ct.Name); + } + } + /// <summary> /// Validates if the driver can call GetObjects. /// </summary>