This is an automated email from the ASF dual-hosted git repository.

praveenbingo pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new 423d5e7  ARROW-9501: Add logic in timestampdiff() when end date is 
last day of…
423d5e7 is described below

commit 423d5e71cfbe3159349b2852650ffa51a507a78c
Author: Sagnik Chakraborty <[email protected]>
AuthorDate: Mon Jul 20 16:33:32 2020 +0530

    ARROW-9501: Add logic in timestampdiff() when end date is last day of…
    
    … a month
    
    timestampdiff(month, startDate, endDate) returns wrong result in Gandiva 
when the endDate < startDate and endDate is the last day of the month. An 
additional month is said to have passed when the end day is greater than or 
equal to the start day, but this does not hold true for dates which are last 
days of the month.
    
    Case in point, if startDate = 2020-01-31, endDate = 2020-02-29, previously 
timestampdiff() returned 0, but the correct result should be 1.
    
    Closes #7782 from sagnikc-dremio/diff and squashes the following commits:
    
    665fa27a7 <Sagnik Chakraborty> ARROW-9501: Add logic in timestampdiff() 
when end date is last day of a month
    
    Authored-by: Sagnik Chakraborty <[email protected]>
    Signed-off-by: Praveen <[email protected]>
---
 .../gandiva/precompiled/timestamp_arithmetic.cc    | 33 +++++++++++-
 cpp/src/gandiva/tests/date_time_test.cc            | 58 ++++++++++++++++++++++
 2 files changed, 90 insertions(+), 1 deletion(-)

diff --git a/cpp/src/gandiva/precompiled/timestamp_arithmetic.cc 
b/cpp/src/gandiva/precompiled/timestamp_arithmetic.cc
index ca15cc7..cdf9139 100644
--- a/cpp/src/gandiva/precompiled/timestamp_arithmetic.cc
+++ b/cpp/src/gandiva/precompiled/timestamp_arithmetic.cc
@@ -17,6 +17,36 @@
 
 #include "./epoch_time_point.h"
 
+bool is_leap_year(int yy) {
+  if ((yy % 4) != 0) {
+    // not divisible by 4
+    return false;
+  }
+  // yy = 4x
+  if ((yy % 400) == 0) {
+    // yy = 400x
+    return true;
+  }
+  // yy = 4x, return true if yy != 100x
+  return ((yy % 100) != 0);
+}
+
+bool is_last_day_of_month(const EpochTimePoint& tp) {
+  int days_in_month[] = {31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31};
+  if (tp.TmMon() != 1) {
+    // not February. Don't worry about leap year
+    return (tp.TmMday() == days_in_month[tp.TmMon()]);
+  } else if (tp.TmMday() < 28) {
+    // this is February, check if the day is 28 or 29
+    return false;
+  } else if (tp.TmMday() == 29) {
+    // Feb 29th
+    return true;
+  }
+  // check if year is non-leap year
+  return !is_leap_year(tp.TmYear());
+}
+
 extern "C" {
 
 #include <time.h>
@@ -66,7 +96,8 @@ extern "C" {
     if (end_tm.TmMday() < start_tm.TmMday()) {                                 
       \
       /* case b */                                                             
       \
       diff = MONTHS_TO_TIMEUNIT(months_diff - 1, N_MONTHS);                    
       \
-      return SIGN_ADJUST_DIFF(is_positive, diff);                              
       \
+      return SIGN_ADJUST_DIFF(is_positive, diff) +                             
       \
+             (is_last_day_of_month(end_tm) ? 1 : 0);                           
       \
     }                                                                          
       \
     gdv_int32 end_day_millis =                                                 
       \
         static_cast<gdv_int32>(end_tm.TmHour() * MILLIS_IN_HOUR +              
       \
diff --git a/cpp/src/gandiva/tests/date_time_test.cc 
b/cpp/src/gandiva/tests/date_time_test.cc
index 11371b0..fdf2a72 100644
--- a/cpp/src/gandiva/tests/date_time_test.cc
+++ b/cpp/src/gandiva/tests/date_time_test.cc
@@ -426,6 +426,64 @@ TEST_F(TestProjector, TestTimestampDiff) {
   }
 }
 
+TEST_F(TestProjector, TestTimestampDiffMonth) {
+  auto f0 = field("f0", timestamp(arrow::TimeUnit::MILLI));
+  auto f1 = field("f1", timestamp(arrow::TimeUnit::MILLI));
+  auto schema = arrow::schema({f0, f1});
+
+  // output fields
+  auto diff_seconds = field("ss", int32());
+
+  auto diff_months_expr =
+      TreeExprBuilder::MakeExpression("timestampdiffMonth", {f0, f1}, 
diff_seconds);
+
+  std::shared_ptr<Projector> projector;
+  auto status =
+      Projector::Make(schema, {diff_months_expr}, TestConfiguration(), 
&projector);
+  std::cout << status.message();
+  ASSERT_TRUE(status.ok());
+
+  time_t epoch = Epoch();
+
+  // Create a row-batch with some sample data
+  std::vector<int64_t> f0_data = {MillisSince(epoch, 2019, 1, 31, 0, 0, 0, 0),
+                                  MillisSince(epoch, 2020, 1, 31, 0, 0, 0, 0),
+                                  MillisSince(epoch, 2020, 1, 31, 0, 0, 0, 0),
+                                  MillisSince(epoch, 2019, 3, 31, 0, 0, 0, 0),
+                                  MillisSince(epoch, 2020, 3, 30, 0, 0, 0, 0),
+                                  MillisSince(epoch, 2020, 5, 31, 0, 0, 0, 0)};
+  std::vector<int64_t> f1_data = {MillisSince(epoch, 2019, 2, 28, 0, 0, 0, 0),
+                                  MillisSince(epoch, 2020, 2, 28, 0, 0, 0, 0),
+                                  MillisSince(epoch, 2020, 2, 29, 0, 0, 0, 0),
+                                  MillisSince(epoch, 2019, 4, 30, 0, 0, 0, 0),
+                                  MillisSince(epoch, 2020, 2, 29, 0, 0, 0, 0),
+                                  MillisSince(epoch, 2020, 9, 30, 0, 0, 0, 0)};
+  int64_t num_records = f0_data.size();
+  std::vector<bool> validity(num_records, true);
+
+  auto array0 = MakeArrowTypeArray<arrow::TimestampType, int64_t>(
+      arrow::timestamp(arrow::TimeUnit::MILLI), f0_data, validity);
+  auto array1 = MakeArrowTypeArray<arrow::TimestampType, int64_t>(
+      arrow::timestamp(arrow::TimeUnit::MILLI), f1_data, validity);
+
+  // expected output
+  std::vector<ArrayPtr> exp_output;
+  exp_output.push_back(MakeArrowArrayInt32({1, 0, 1, 1, -1, 4}, validity));
+
+  // prepare input record batch
+  auto in_batch = arrow::RecordBatch::Make(schema, num_records, {array0, 
array1});
+
+  // Evaluate expression
+  arrow::ArrayVector outputs;
+  status = projector->Evaluate(*in_batch, pool_, &outputs);
+  EXPECT_TRUE(status.ok());
+
+  // Validate results
+  for (uint32_t i = 0; i < exp_output.size(); i++) {
+    EXPECT_ARROW_ARRAY_EQUALS(exp_output.at(i), outputs.at(i));
+  }
+}
+
 TEST_F(TestProjector, TestMonthsBetween) {
   auto f0 = field("f0", arrow::date64());
   auto f1 = field("f1", arrow::date64());

Reply via email to