This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git
The following commit(s) were added to refs/heads/master by this push:
new 18769ce add blake2s and blake2b function (#1081)
18769ce is described below
commit 18769ce5735266c47e5cf42414ef8022bb818e1e
Author: Jiayu Liu <[email protected]>
AuthorDate: Sun Oct 10 18:58:15 2021 +0800
add blake2s and blake2b function (#1081)
---
datafusion/Cargo.toml | 3 ++-
datafusion/src/physical_plan/crypto_expressions.rs | 24 ++++++++++++++++++----
datafusion/tests/sql.rs | 12 +++++++++++
3 files changed, 34 insertions(+), 5 deletions(-)
diff --git a/datafusion/Cargo.toml b/datafusion/Cargo.toml
index 327fcd3..ea9ca21 100644
--- a/datafusion/Cargo.toml
+++ b/datafusion/Cargo.toml
@@ -39,7 +39,7 @@ path = "src/lib.rs"
[features]
default = ["crypto_expressions", "regex_expressions", "unicode_expressions"]
simd = ["arrow/simd"]
-crypto_expressions = ["md-5", "sha2"]
+crypto_expressions = ["md-5", "sha2", "blake2"]
regex_expressions = ["regex", "lazy_static"]
unicode_expressions = ["unicode-segmentation"]
# Used for testing ONLY: causes all values to hash to the same value (test for
collisions)
@@ -64,6 +64,7 @@ tokio-stream = "0.1"
log = "^0.4"
md-5 = { version = "^0.9.1", optional = true }
sha2 = { version = "^0.9.1", optional = true }
+blake2 = { version = "^0.9.2", optional = true }
ordered-float = "2.0"
unicode-segmentation = { version = "^1.7.1", optional = true }
regex = { version = "^1.4.3", optional = true }
diff --git a/datafusion/src/physical_plan/crypto_expressions.rs
b/datafusion/src/physical_plan/crypto_expressions.rs
index 8fa9f44..8c575bc 100644
--- a/datafusion/src/physical_plan/crypto_expressions.rs
+++ b/datafusion/src/physical_plan/crypto_expressions.rs
@@ -28,8 +28,9 @@ use arrow::{
},
datatypes::DataType,
};
+use blake2::{Blake2b, Blake2s, Digest};
use md5::Md5;
-use sha2::{Digest as SHA2Digest, Sha224, Sha256, Sha384, Sha512};
+use sha2::{Sha224, Sha256, Sha384, Sha512};
use std::any::type_name;
use std::fmt::Write;
use std::sync::Arc;
@@ -48,6 +49,8 @@ enum DigestAlgorithm {
Sha256,
Sha384,
Sha512,
+ Blake2s,
+ Blake2b,
}
fn digest_process(
@@ -112,6 +115,8 @@ impl DigestAlgorithm {
Self::Sha256 => digest_to_scalar!(Sha256, value),
Self::Sha384 => digest_to_scalar!(Sha384, value),
Self::Sha512 => digest_to_scalar!(Sha512, value),
+ Self::Blake2b => digest_to_scalar!(Blake2b, value),
+ Self::Blake2s => digest_to_scalar!(Blake2s, value),
})
}
@@ -135,6 +140,8 @@ impl DigestAlgorithm {
Self::Sha256 => digest_to_array!(Sha256, input_value),
Self::Sha384 => digest_to_array!(Sha384, input_value),
Self::Sha512 => digest_to_array!(Sha512, input_value),
+ Self::Blake2b => digest_to_array!(Blake2b, input_value),
+ Self::Blake2s => digest_to_array!(Blake2s, input_value),
};
Ok(ColumnarValue::Array(array))
}
@@ -155,6 +162,8 @@ impl FromStr for DigestAlgorithm {
"sha256" => Self::Sha256,
"sha384" => Self::Sha384,
"sha512" => Self::Sha512,
+ "blake2b" => Self::Blake2b,
+ "blake2s" => Self::Blake2s,
_ => {
return Err(DataFusionError::Plan(format!(
"There is no built-in digest algorithm named {}",
@@ -237,24 +246,31 @@ define_digest_function!(
Sha224,
"computes sha224 hash digest of the given input"
);
-
define_digest_function!(
sha256,
Sha256,
"computes sha256 hash digest of the given input"
);
-
define_digest_function!(
sha384,
Sha384,
"computes sha384 hash digest of the given input"
);
-
define_digest_function!(
sha512,
Sha512,
"computes sha512 hash digest of the given input"
);
+define_digest_function!(
+ blake2b,
+ Blake2b,
+ "computes blake2b hash digest of the given input"
+);
+define_digest_function!(
+ blake2s,
+ Blake2s,
+ "computes blake2s hash digest of the given input"
+);
/// Digest computes a binary hash of the given data, accepts Utf8 or LargeUtf8
and returns a [`ColumnarValue`].
/// Second argument is the algorithm to use.
diff --git a/datafusion/tests/sql.rs b/datafusion/tests/sql.rs
index 4ed07af..801451f 100644
--- a/datafusion/tests/sql.rs
+++ b/datafusion/tests/sql.rs
@@ -4050,6 +4050,18 @@ async fn test_crypto_expressions() -> Result<()> {
test_expression!("digest('','sha512')",
"cf83e1357eefb8bdf1542850d66d8007d620e4050b5715dc83f4a921d36ce9ce47d0d13c5d85f2b0ff8318d2877eec2f63b931bd47417a81a538327af927da3e");
test_expression!("sha512(NULL)", "NULL");
test_expression!("digest(NULL,'sha512')", "NULL");
+ test_expression!("digest(NULL,'blake2s')", "NULL");
+ test_expression!("digest(NULL,'blake2b')", "NULL");
+ test_expression!("digest('','blake2b')",
"786a02f742015903c6c6fd852552d272912f4740e15847618a86e217f71f5419d25e1031afee585313896444934eb04b903a685b1448b755d56f701afe9be2ce");
+ test_expression!("digest('tom','blake2b')",
"482499a18da10a18d8d35ab5eb4c635551ec5b8d3ff37c3e87a632caf6680fe31566417834b4732e26e0203d1cad4f5366cb7ab57d89694e4c1fda3e26af2c23");
+ test_expression!(
+ "digest('','blake2s')",
+ "69217a3079908094e11121d042354a7c1f55b6482ca1a51e1b250dfd1ed0eef9"
+ );
+ test_expression!(
+ "digest('tom','blake2s')",
+ "5fc3f2b3a07cade5023c3df566e4d697d3823ba1b72bfb3e84cf7e768b2e7529"
+ );
Ok(())
}