This is an automated email from the ASF dual-hosted git repository.
morningman pushed a commit to branch branch-4.0
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-4.0 by this push:
new bf071a7e007 branch-4.0:[fix](parquet)fix parquet write timestamp int96
type. (1/2). (#63779)
bf071a7e007 is described below
commit bf071a7e007cf641cf10f9c75a9909d77ba9d46b
Author: daidai <[email protected]>
AuthorDate: Thu May 28 14:40:19 2026 +0800
branch-4.0:[fix](parquet)fix parquet write timestamp int96 type. (1/2).
(#63779)
### What problem does this PR solve?
Problem Summary:
pick #61760
---
thirdparty/download-thirdparty.sh | 3 +
...arrow-17.0.0-force-write-int96-timestamps.patch | 97 ++++++++++++++++++++++
thirdparty/vars.sh | 2 +-
3 files changed, 101 insertions(+), 1 deletion(-)
diff --git a/thirdparty/download-thirdparty.sh
b/thirdparty/download-thirdparty.sh
index 89040c09d6a..d5350ac9a30 100755
--- a/thirdparty/download-thirdparty.sh
+++ b/thirdparty/download-thirdparty.sh
@@ -380,6 +380,9 @@ if [[ " ${TP_ARCHIVES[*]} " =~ " ARROW " ]]; then
# std::string objects in RELRO, then crash while initializing them.
patch -p1
<"${TP_PATCH_DIR}/apache-arrow-17.0.0-status-inline-static-fix.patch"
+ # apache-arrow-17.0.0-force-write-int96-timestamps.patch :
+ # Introducing the parameter that forces writing int96 timestampes
for compatibility build branch-4.0.
+ patch -p1
<"${TP_PATCH_DIR}/apache-arrow-17.0.0-force-write-int96-timestamps.patch"
touch "${PATCHED_MARK}"
fi
cd -
diff --git
a/thirdparty/patches/apache-arrow-17.0.0-force-write-int96-timestamps.patch
b/thirdparty/patches/apache-arrow-17.0.0-force-write-int96-timestamps.patch
new file mode 100644
index 00000000000..30ca13aa1c0
--- /dev/null
+++ b/thirdparty/patches/apache-arrow-17.0.0-force-write-int96-timestamps.patch
@@ -0,0 +1,97 @@
+diff -ruN arrow-apache-arrow-17.0.0/cpp/src/parquet/arrow/schema.cc
arrow-apache-arrow-branch40-17.0.0/cpp/src/parquet/arrow/schema.cc
+--- arrow-apache-arrow-17.0.0/cpp/src/parquet/arrow/schema.cc 2024-07-11
16:57:21.000000000 +0800
++++ arrow-apache-arrow-branch40-17.0.0/cpp/src/parquet/arrow/schema.cc
2026-05-28 10:47:43.886820058 +0800
+@@ -178,7 +178,8 @@
+
+ // The user is explicitly asking for Impala int96 encoding, there is no
+ // logical type.
+- if (arrow_properties.support_deprecated_int96_timestamps()) {
++ if (arrow_properties.force_write_int96_timestamps() ||
++ arrow_properties.support_deprecated_int96_timestamps()) {
+ *physical_type = ParquetType::INT96;
+ return Status::OK();
+ }
+diff -ruN arrow-apache-arrow-17.0.0/cpp/src/parquet/properties.h
arrow-apache-arrow-branch40-17.0.0/cpp/src/parquet/properties.h
+--- arrow-apache-arrow-17.0.0/cpp/src/parquet/properties.h 2024-07-11
16:57:21.000000000 +0800
++++ arrow-apache-arrow-branch40-17.0.0/cpp/src/parquet/properties.h
2026-05-28 10:53:44.136083775 +0800
+@@ -965,6 +965,7 @@
+ public:
+ Builder()
+ : write_timestamps_as_int96_(false),
++ force_write_int96_timestamps_(false),
+ coerce_timestamps_enabled_(false),
+ coerce_timestamps_unit_(::arrow::TimeUnit::SECOND),
+ truncated_timestamps_allowed_(false),
+@@ -990,6 +991,21 @@
+ return this;
+ }
+
++ /// \brief Force writing legacy int96 timestamps.
++ ///
++ /// This bypasses unit-based guards and writes INT96 whenever timestamp
++ /// metadata is resolved.
++ Builder* enable_force_write_int96_timestamps() {
++ force_write_int96_timestamps_ = true;
++ return this;
++ }
++
++ /// \brief Disable forcing legacy int96 timestamps (default).
++ Builder* disable_force_write_int96_timestamps() {
++ force_write_int96_timestamps_ = false;
++ return this;
++ }
++
+ /// \brief Coerce all timestamps to the specified time unit.
+ /// \param unit time unit to truncate to.
+ /// For Parquet versions 1.0 and 2.4, nanoseconds are casted to
microseconds.
+@@ -1070,14 +1086,15 @@
+ /// Create the final properties.
+ std::shared_ptr<ArrowWriterProperties> build() {
+ return std::shared_ptr<ArrowWriterProperties>(new ArrowWriterProperties(
+- write_timestamps_as_int96_, coerce_timestamps_enabled_,
coerce_timestamps_unit_,
++ write_timestamps_as_int96_, force_write_int96_timestamps_,
++ coerce_timestamps_enabled_, coerce_timestamps_unit_,
+ truncated_timestamps_allowed_, store_schema_,
compliant_nested_types_,
+ engine_version_, use_threads_, executor_));
+ }
+
+ private:
+ bool write_timestamps_as_int96_;
+-
++ bool force_write_int96_timestamps_;
+ bool coerce_timestamps_enabled_;
+ ::arrow::TimeUnit::type coerce_timestamps_unit_;
+ bool truncated_timestamps_allowed_;
+@@ -1092,6 +1109,8 @@
+
+ bool support_deprecated_int96_timestamps() const { return
write_timestamps_as_int96_; }
+
++ bool force_write_int96_timestamps() const { return
force_write_int96_timestamps_; }
++
+ bool coerce_timestamps_enabled() const { return coerce_timestamps_enabled_;
}
+ ::arrow::TimeUnit::type coerce_timestamps_unit() const {
+ return coerce_timestamps_unit_;
+@@ -1123,6 +1142,7 @@
+
+ private:
+ explicit ArrowWriterProperties(bool write_nanos_as_int96,
++ bool force_write_int96_timestamps,
+ bool coerce_timestamps_enabled,
+ ::arrow::TimeUnit::type
coerce_timestamps_unit,
+ bool truncated_timestamps_allowed, bool
store_schema,
+@@ -1130,6 +1150,7 @@
+ EngineVersion engine_version, bool
use_threads,
+ ::arrow::internal::Executor* executor)
+ : write_timestamps_as_int96_(write_nanos_as_int96),
++ force_write_int96_timestamps_(force_write_int96_timestamps),
+ coerce_timestamps_enabled_(coerce_timestamps_enabled),
+ coerce_timestamps_unit_(coerce_timestamps_unit),
+ truncated_timestamps_allowed_(truncated_timestamps_allowed),
+@@ -1140,6 +1161,7 @@
+ executor_(executor) {}
+
+ const bool write_timestamps_as_int96_;
++ const bool force_write_int96_timestamps_;
+ const bool coerce_timestamps_enabled_;
+ const ::arrow::TimeUnit::type coerce_timestamps_unit_;
+ const bool truncated_timestamps_allowed_;
diff --git a/thirdparty/vars.sh b/thirdparty/vars.sh
index ac36a438055..7356cba4c11 100644
--- a/thirdparty/vars.sh
+++ b/thirdparty/vars.sh
@@ -134,7 +134,7 @@ BZIP_SOURCE=bzip2-1.0.8
BZIP_MD5SUM="67e051268d0c475ea773822f7500d0e5"
# lzo2
-LZO2_DOWNLOAD="https://fossies.org/linux/misc/lzo-2.10.tar.gz"
+LZO2_DOWNLOAD="https://www.oberhumer.com/opensource/lzo/download/lzo-2.10.tar.gz"
LZO2_NAME=lzo-2.10.tar.gz
LZO2_SOURCE=lzo-2.10
LZO2_MD5SUM="39d3f3f9c55c87b1e5d6888e1420f4b5"
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]