This is an automated email from the ASF dual-hosted git repository.
diwu pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris-spark-connector.git
The following commit(s) were added to refs/heads/master by this push:
new 5f676bb [improvement] change config default value for read
optimization (#196)
5f676bb is described below
commit 5f676bb8f43b8cd5be03caa6b3b73244fdd37e5e
Author: gnehil <[email protected]>
AuthorDate: Fri Mar 29 16:07:10 2024 +0800
[improvement] change config default value for read optimization (#196)
---
.../doris/spark/cfg/ConfigurationOptions.java | 6 ++---
.../apache/doris/spark/serialization/RowBatch.java | 2 +-
.../apache/doris/spark/rest/TestRestService.java | 27 +++++++++++++++-------
3 files changed, 23 insertions(+), 12 deletions(-)
diff --git
a/spark-doris-connector/src/main/java/org/apache/doris/spark/cfg/ConfigurationOptions.java
b/spark-doris-connector/src/main/java/org/apache/doris/spark/cfg/ConfigurationOptions.java
index 6ac7467..a0fea83 100644
---
a/spark-doris-connector/src/main/java/org/apache/doris/spark/cfg/ConfigurationOptions.java
+++
b/spark-doris-connector/src/main/java/org/apache/doris/spark/cfg/ConfigurationOptions.java
@@ -47,17 +47,17 @@ public interface ConfigurationOptions {
int DORIS_REQUEST_RETRIES_DEFAULT = 3;
int DORIS_REQUEST_CONNECT_TIMEOUT_MS_DEFAULT = 30 * 1000;
int DORIS_REQUEST_READ_TIMEOUT_MS_DEFAULT = 30 * 1000;
- int DORIS_REQUEST_QUERY_TIMEOUT_S_DEFAULT = 3600;
+ int DORIS_REQUEST_QUERY_TIMEOUT_S_DEFAULT = 6 * 60 * 60;
String DORIS_TABLET_SIZE = "doris.request.tablet.size";
- int DORIS_TABLET_SIZE_DEFAULT = Integer.MAX_VALUE;
+ int DORIS_TABLET_SIZE_DEFAULT = 1;
int DORIS_TABLET_SIZE_MIN = 1;
String DORIS_BATCH_SIZE = "doris.batch.size";
int DORIS_BATCH_SIZE_DEFAULT = 1024;
String DORIS_EXEC_MEM_LIMIT = "doris.exec.mem.limit";
- long DORIS_EXEC_MEM_LIMIT_DEFAULT = 2147483648L;
+ long DORIS_EXEC_MEM_LIMIT_DEFAULT = 8L * 1024 * 1024 * 1024;
String DORIS_VALUE_READER_CLASS = "doris.value.reader.class";
diff --git
a/spark-doris-connector/src/main/java/org/apache/doris/spark/serialization/RowBatch.java
b/spark-doris-connector/src/main/java/org/apache/doris/spark/serialization/RowBatch.java
index 8dbc4bf..7c28f76 100644
---
a/spark-doris-connector/src/main/java/org/apache/doris/spark/serialization/RowBatch.java
+++
b/spark-doris-connector/src/main/java/org/apache/doris/spark/serialization/RowBatch.java
@@ -119,7 +119,7 @@ public class RowBatch {
fieldVectors = root.getFieldVectors();
if (fieldVectors.size() > schema.size()) {
logger.error("Data schema size '{}' should not be bigger
than arrow field size '{}'.",
- fieldVectors.size(), schema.size());
+ schema.size(), fieldVectors.size());
throw new DorisException("Load Doris data failed, schema
size of fetch data is wrong.");
}
if (fieldVectors.isEmpty() || root.getRowCount() == 0) {
diff --git
a/spark-doris-connector/src/test/java/org/apache/doris/spark/rest/TestRestService.java
b/spark-doris-connector/src/test/java/org/apache/doris/spark/rest/TestRestService.java
index c87d94b..d83d0bc 100644
---
a/spark-doris-connector/src/test/java/org/apache/doris/spark/rest/TestRestService.java
+++
b/spark-doris-connector/src/test/java/org/apache/doris/spark/rest/TestRestService.java
@@ -273,21 +273,32 @@ public class TestRestService {
String database = "d";
String table = "t";
- Set<Long> be1Tablet = new HashSet<>();
- be1Tablet.add(1L);
- be1Tablet.add(2L);
+ Set<Long> be1Tablet1 = new HashSet<>();
+ be1Tablet1.add(1L);
PartitionDefinition pd1 = new PartitionDefinition(
- database, table, settings, "be1", be1Tablet, opaquedQueryPlan);
+ database, table, settings, "be1", be1Tablet1,
opaquedQueryPlan);
- Set<Long> be2Tablet = new HashSet<>();
- be2Tablet.add(3L);
- be2Tablet.add(4L);
+ Set<Long> be1Tablet2 = new HashSet<>();
+ be1Tablet2.add(2L);
PartitionDefinition pd2 = new PartitionDefinition(
- database, table, settings, "be2", be2Tablet, opaquedQueryPlan);
+ database, table, settings, "be1", be1Tablet2,
opaquedQueryPlan);
+
+ Set<Long> be2Tablet1 = new HashSet<>();
+ be2Tablet1.add(3L);
+ PartitionDefinition pd3 = new PartitionDefinition(
+ database, table, settings, "be2", be2Tablet1,
opaquedQueryPlan);
+
+ Set<Long> be2Tablet2 = new HashSet<>();
+ be2Tablet2.add(4L);
+ PartitionDefinition pd4 = new PartitionDefinition(
+ database, table, settings, "be2", be2Tablet2,
opaquedQueryPlan);
+
List<PartitionDefinition> expected = new ArrayList<>();
expected.add(pd1);
expected.add(pd2);
+ expected.add(pd3);
+ expected.add(pd4);
Collections.sort(expected);
List<PartitionDefinition> actual = RestService.tabletsMapToPartition(
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]