pwrliang commented on code in PR #556:
URL: https://github.com/apache/sedona-db/pull/556#discussion_r2760801410


##########
c/sedona-libgpuspatial/libgpuspatial/include/gpuspatial/loader/parallel_wkb_loader.hpp:
##########
@@ -875,21 +915,107 @@ class ParallelWkbLoader {
     nums.shrink_to_fit(stream);
   }
 
-  size_t estimateTotalBytes(const ArrowArray* array, int64_t offset, int64_t 
length) {
-    ArrowError arrow_error;
-    if (ArrowArrayViewSetArray(&array_view_, array, &arrow_error) != 
NANOARROW_OK) {
-      throw std::runtime_error("ArrowArrayViewSetArray error " +
-                               std::string(arrow_error.message));
-    }
+  template <typename OFFSET_IT>
+  size_t estimateTotalBytes(OFFSET_IT begin, OFFSET_IT end) const {
     size_t total_bytes = 0;
-    for (int64_t i = 0; i < length; i++) {
-      if (!ArrowArrayViewIsNull(&array_view_, offset + i)) {
-        auto item = ArrowArrayViewGetBytesUnsafe(&array_view_, offset + i);
+    for (auto it = begin; it != end; ++it) {
+      auto offset = *it;
+      if (!ArrowArrayViewIsNull(array_view_.get(), offset)) {
+        auto item = ArrowArrayViewGetBytesUnsafe(array_view_.get(), offset);
         total_bytes += item.size_bytes - 1      // byte order
                        - 2 * sizeof(uint32_t);  // type + size
       }
     }
     return total_bytes;
   }
+
+  template <typename OFFSET_IT>
+  std::vector<uint32_t> assignBalancedWorks(OFFSET_IT begin, OFFSET_IT end,
+                                            uint32_t num_threads) const {
+    size_t total_bytes = 0;

Review Comment:
   I changed it to two branches with/without nulls, but I believe it offers 
little performance improvement, since the CPU's branch prediction is very good 
for that.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to