This is an automated email from the ASF dual-hosted git repository.

diwu pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris-spark-connector.git


The following commit(s) were added to refs/heads/master by this push:
     new 5f676bb  [improvement] change config default value for read 
optimization (#196)
5f676bb is described below

commit 5f676bb8f43b8cd5be03caa6b3b73244fdd37e5e
Author: gnehil <[email protected]>
AuthorDate: Fri Mar 29 16:07:10 2024 +0800

    [improvement] change config default value for read optimization (#196)
---
 .../doris/spark/cfg/ConfigurationOptions.java      |  6 ++---
 .../apache/doris/spark/serialization/RowBatch.java |  2 +-
 .../apache/doris/spark/rest/TestRestService.java   | 27 +++++++++++++++-------
 3 files changed, 23 insertions(+), 12 deletions(-)

diff --git 
a/spark-doris-connector/src/main/java/org/apache/doris/spark/cfg/ConfigurationOptions.java
 
b/spark-doris-connector/src/main/java/org/apache/doris/spark/cfg/ConfigurationOptions.java
index 6ac7467..a0fea83 100644
--- 
a/spark-doris-connector/src/main/java/org/apache/doris/spark/cfg/ConfigurationOptions.java
+++ 
b/spark-doris-connector/src/main/java/org/apache/doris/spark/cfg/ConfigurationOptions.java
@@ -47,17 +47,17 @@ public interface ConfigurationOptions {
     int DORIS_REQUEST_RETRIES_DEFAULT = 3;
     int DORIS_REQUEST_CONNECT_TIMEOUT_MS_DEFAULT = 30 * 1000;
     int DORIS_REQUEST_READ_TIMEOUT_MS_DEFAULT = 30 * 1000;
-    int DORIS_REQUEST_QUERY_TIMEOUT_S_DEFAULT = 3600;
+    int DORIS_REQUEST_QUERY_TIMEOUT_S_DEFAULT = 6 * 60 * 60;
 
     String DORIS_TABLET_SIZE = "doris.request.tablet.size";
-    int DORIS_TABLET_SIZE_DEFAULT = Integer.MAX_VALUE;
+    int DORIS_TABLET_SIZE_DEFAULT = 1;
     int DORIS_TABLET_SIZE_MIN = 1;
 
     String DORIS_BATCH_SIZE = "doris.batch.size";
     int DORIS_BATCH_SIZE_DEFAULT = 1024;
 
     String DORIS_EXEC_MEM_LIMIT = "doris.exec.mem.limit";
-    long DORIS_EXEC_MEM_LIMIT_DEFAULT = 2147483648L;
+    long DORIS_EXEC_MEM_LIMIT_DEFAULT = 8L * 1024 * 1024 * 1024;
 
     String DORIS_VALUE_READER_CLASS = "doris.value.reader.class";
 
diff --git 
a/spark-doris-connector/src/main/java/org/apache/doris/spark/serialization/RowBatch.java
 
b/spark-doris-connector/src/main/java/org/apache/doris/spark/serialization/RowBatch.java
index 8dbc4bf..7c28f76 100644
--- 
a/spark-doris-connector/src/main/java/org/apache/doris/spark/serialization/RowBatch.java
+++ 
b/spark-doris-connector/src/main/java/org/apache/doris/spark/serialization/RowBatch.java
@@ -119,7 +119,7 @@ public class RowBatch {
                 fieldVectors = root.getFieldVectors();
                 if (fieldVectors.size() > schema.size()) {
                     logger.error("Data schema size '{}' should not be bigger 
than arrow field size '{}'.",
-                            fieldVectors.size(), schema.size());
+                            schema.size(), fieldVectors.size());
                     throw new DorisException("Load Doris data failed, schema 
size of fetch data is wrong.");
                 }
                 if (fieldVectors.isEmpty() || root.getRowCount() == 0) {
diff --git 
a/spark-doris-connector/src/test/java/org/apache/doris/spark/rest/TestRestService.java
 
b/spark-doris-connector/src/test/java/org/apache/doris/spark/rest/TestRestService.java
index c87d94b..d83d0bc 100644
--- 
a/spark-doris-connector/src/test/java/org/apache/doris/spark/rest/TestRestService.java
+++ 
b/spark-doris-connector/src/test/java/org/apache/doris/spark/rest/TestRestService.java
@@ -273,21 +273,32 @@ public class TestRestService {
         String database = "d";
         String table = "t";
 
-        Set<Long> be1Tablet = new HashSet<>();
-        be1Tablet.add(1L);
-        be1Tablet.add(2L);
+        Set<Long> be1Tablet1 = new HashSet<>();
+        be1Tablet1.add(1L);
         PartitionDefinition pd1 = new PartitionDefinition(
-                database, table, settings, "be1", be1Tablet, opaquedQueryPlan);
+                database, table, settings, "be1", be1Tablet1, 
opaquedQueryPlan);
 
-        Set<Long> be2Tablet = new HashSet<>();
-        be2Tablet.add(3L);
-        be2Tablet.add(4L);
+        Set<Long> be1Tablet2 = new HashSet<>();
+        be1Tablet2.add(2L);
         PartitionDefinition pd2 = new PartitionDefinition(
-                database, table, settings, "be2", be2Tablet, opaquedQueryPlan);
+                database, table, settings, "be1", be1Tablet2, 
opaquedQueryPlan);
+
+        Set<Long> be2Tablet1 = new HashSet<>();
+        be2Tablet1.add(3L);
+        PartitionDefinition pd3 = new PartitionDefinition(
+                database, table, settings, "be2", be2Tablet1, 
opaquedQueryPlan);
+
+        Set<Long> be2Tablet2 = new HashSet<>();
+        be2Tablet2.add(4L);
+        PartitionDefinition pd4 = new PartitionDefinition(
+                database, table, settings, "be2", be2Tablet2, 
opaquedQueryPlan);
+
 
         List<PartitionDefinition> expected = new ArrayList<>();
         expected.add(pd1);
         expected.add(pd2);
+        expected.add(pd3);
+        expected.add(pd4);
         Collections.sort(expected);
 
         List<PartitionDefinition> actual = RestService.tabletsMapToPartition(


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to