This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git


The following commit(s) were added to refs/heads/master by this push:
     new 18769ce  add blake2s and blake2b function (#1081)
18769ce is described below

commit 18769ce5735266c47e5cf42414ef8022bb818e1e
Author: Jiayu Liu <[email protected]>
AuthorDate: Sun Oct 10 18:58:15 2021 +0800

    add blake2s and blake2b function (#1081)
---
 datafusion/Cargo.toml                              |  3 ++-
 datafusion/src/physical_plan/crypto_expressions.rs | 24 ++++++++++++++++++----
 datafusion/tests/sql.rs                            | 12 +++++++++++
 3 files changed, 34 insertions(+), 5 deletions(-)

diff --git a/datafusion/Cargo.toml b/datafusion/Cargo.toml
index 327fcd3..ea9ca21 100644
--- a/datafusion/Cargo.toml
+++ b/datafusion/Cargo.toml
@@ -39,7 +39,7 @@ path = "src/lib.rs"
 [features]
 default = ["crypto_expressions", "regex_expressions", "unicode_expressions"]
 simd = ["arrow/simd"]
-crypto_expressions = ["md-5", "sha2"]
+crypto_expressions = ["md-5", "sha2", "blake2"]
 regex_expressions = ["regex", "lazy_static"]
 unicode_expressions = ["unicode-segmentation"]
 # Used for testing ONLY: causes all values to hash to the same value (test for 
collisions)
@@ -64,6 +64,7 @@ tokio-stream = "0.1"
 log = "^0.4"
 md-5 = { version = "^0.9.1", optional = true }
 sha2 = { version = "^0.9.1", optional = true }
+blake2 = { version = "^0.9.2", optional = true }
 ordered-float = "2.0"
 unicode-segmentation = { version = "^1.7.1", optional = true }
 regex = { version = "^1.4.3", optional = true }
diff --git a/datafusion/src/physical_plan/crypto_expressions.rs 
b/datafusion/src/physical_plan/crypto_expressions.rs
index 8fa9f44..8c575bc 100644
--- a/datafusion/src/physical_plan/crypto_expressions.rs
+++ b/datafusion/src/physical_plan/crypto_expressions.rs
@@ -28,8 +28,9 @@ use arrow::{
     },
     datatypes::DataType,
 };
+use blake2::{Blake2b, Blake2s, Digest};
 use md5::Md5;
-use sha2::{Digest as SHA2Digest, Sha224, Sha256, Sha384, Sha512};
+use sha2::{Sha224, Sha256, Sha384, Sha512};
 use std::any::type_name;
 use std::fmt::Write;
 use std::sync::Arc;
@@ -48,6 +49,8 @@ enum DigestAlgorithm {
     Sha256,
     Sha384,
     Sha512,
+    Blake2s,
+    Blake2b,
 }
 
 fn digest_process(
@@ -112,6 +115,8 @@ impl DigestAlgorithm {
             Self::Sha256 => digest_to_scalar!(Sha256, value),
             Self::Sha384 => digest_to_scalar!(Sha384, value),
             Self::Sha512 => digest_to_scalar!(Sha512, value),
+            Self::Blake2b => digest_to_scalar!(Blake2b, value),
+            Self::Blake2s => digest_to_scalar!(Blake2s, value),
         })
     }
 
@@ -135,6 +140,8 @@ impl DigestAlgorithm {
             Self::Sha256 => digest_to_array!(Sha256, input_value),
             Self::Sha384 => digest_to_array!(Sha384, input_value),
             Self::Sha512 => digest_to_array!(Sha512, input_value),
+            Self::Blake2b => digest_to_array!(Blake2b, input_value),
+            Self::Blake2s => digest_to_array!(Blake2s, input_value),
         };
         Ok(ColumnarValue::Array(array))
     }
@@ -155,6 +162,8 @@ impl FromStr for DigestAlgorithm {
             "sha256" => Self::Sha256,
             "sha384" => Self::Sha384,
             "sha512" => Self::Sha512,
+            "blake2b" => Self::Blake2b,
+            "blake2s" => Self::Blake2s,
             _ => {
                 return Err(DataFusionError::Plan(format!(
                     "There is no built-in digest algorithm named {}",
@@ -237,24 +246,31 @@ define_digest_function!(
     Sha224,
     "computes sha224 hash digest of the given input"
 );
-
 define_digest_function!(
     sha256,
     Sha256,
     "computes sha256 hash digest of the given input"
 );
-
 define_digest_function!(
     sha384,
     Sha384,
     "computes sha384 hash digest of the given input"
 );
-
 define_digest_function!(
     sha512,
     Sha512,
     "computes sha512 hash digest of the given input"
 );
+define_digest_function!(
+    blake2b,
+    Blake2b,
+    "computes blake2b hash digest of the given input"
+);
+define_digest_function!(
+    blake2s,
+    Blake2s,
+    "computes blake2s hash digest of the given input"
+);
 
 /// Digest computes a binary hash of the given data, accepts Utf8 or LargeUtf8 
and returns a [`ColumnarValue`].
 /// Second argument is the algorithm to use.
diff --git a/datafusion/tests/sql.rs b/datafusion/tests/sql.rs
index 4ed07af..801451f 100644
--- a/datafusion/tests/sql.rs
+++ b/datafusion/tests/sql.rs
@@ -4050,6 +4050,18 @@ async fn test_crypto_expressions() -> Result<()> {
     test_expression!("digest('','sha512')", 
"cf83e1357eefb8bdf1542850d66d8007d620e4050b5715dc83f4a921d36ce9ce47d0d13c5d85f2b0ff8318d2877eec2f63b931bd47417a81a538327af927da3e");
     test_expression!("sha512(NULL)", "NULL");
     test_expression!("digest(NULL,'sha512')", "NULL");
+    test_expression!("digest(NULL,'blake2s')", "NULL");
+    test_expression!("digest(NULL,'blake2b')", "NULL");
+    test_expression!("digest('','blake2b')", 
"786a02f742015903c6c6fd852552d272912f4740e15847618a86e217f71f5419d25e1031afee585313896444934eb04b903a685b1448b755d56f701afe9be2ce");
+    test_expression!("digest('tom','blake2b')", 
"482499a18da10a18d8d35ab5eb4c635551ec5b8d3ff37c3e87a632caf6680fe31566417834b4732e26e0203d1cad4f5366cb7ab57d89694e4c1fda3e26af2c23");
+    test_expression!(
+        "digest('','blake2s')",
+        "69217a3079908094e11121d042354a7c1f55b6482ca1a51e1b250dfd1ed0eef9"
+    );
+    test_expression!(
+        "digest('tom','blake2s')",
+        "5fc3f2b3a07cade5023c3df566e4d697d3823ba1b72bfb3e84cf7e768b2e7529"
+    );
     Ok(())
 }
 

Reply via email to