This is an automated email from the ASF dual-hosted git repository.

kxiao pushed a commit to branch branch-2.0
in repository https://gitbox.apache.org/repos/asf/doris.git

commit ef0994193758523aec0eb3398e016ec9b2bea7c6
Author: yagagagaga <zhangminkefromflyd...@gmail.com>
AuthorDate: Mon Oct 16 16:47:21 2023 +0800

    [Improvement](hive-udf)(doc) minimize hive-udf and add some docs. (#24786)
---
 docs/en/docs/ecosystem/hive-bitmap-udf.md    | 56 ++++++++++++++++++++-
 docs/zh-CN/docs/ecosystem/hive-bitmap-udf.md | 56 ++++++++++++++++++++-
 fe/hive-udf/pom.xml                          | 75 ++++++++++++++++++++--------
 3 files changed, 163 insertions(+), 24 deletions(-)

diff --git a/docs/en/docs/ecosystem/hive-bitmap-udf.md 
b/docs/en/docs/ecosystem/hive-bitmap-udf.md
index 6629b9d6a24..7bb93bc90ba 100644
--- a/docs/en/docs/ecosystem/hive-bitmap-udf.md
+++ b/docs/en/docs/ecosystem/hive-bitmap-udf.md
@@ -72,7 +72,7 @@ mvn package -Dmaven.test.skip=true
 --You can also just package the hive-udf module
 mvn package -pl hive-udf -am -Dmaven.test.skip=true
 ```
-After packaging and compiling, enter the hive-udf directory and there will be 
a target directory,There will be hive-udf-jar-with-dependencies.jar package
+After packaging and compiling, enter the hive-udf directory and there will be 
a target directory,There will be hive-udf.jar package
 
 ```sql
 -- Load the Hive Bitmap Udf jar package (Upload the compiled hive-udf jar 
package to HDFS)
@@ -108,4 +108,58 @@ select k1,bitmap_union(uuid) from hive_bitmap_table group 
by k1
 
 ## Hive Bitmap import into Doris
 
+<version since="2.0.2">
+
+### Method 1:Catalog (recommended)
+
+</version>
+
+When create a Hive table in the format specified as TEXT, for Binary type, 
Hive will be saved as a bash64 encoded string. Therefore, the binary data can 
be directly saved as Bitmap through bitmap_from_base64 function by using  
Doris's Hive Catalog.
+
+Here is a full example:
+
+1. Creating Hive Tables in Hive
+
+```sql
+CREATE TABLE IF NOT EXISTS `test`.`hive_bitmap_table`(
+`k1`   int       COMMENT '',
+`k2`   String    COMMENT '',
+`k3`   String    COMMENT '',
+`uuid` binary    COMMENT 'bitmap'
+) stored as textfile 
+```
+
+2. [Creating a Catalog in Doris](../lakehouse/multi-catalog/hive)
+
+```sql
+CREATE CATALOG hive PROPERTIES (
+    'type'='hms',
+    'hive.metastore.uris' = 'thrift://127.0.0.1:9083'
+);
+```
+
+3. Create Doris internal table
+
+```sql
+CREATE TABLE IF NOT EXISTS `test`.`doris_bitmap_table`(
+    `k1`   int                   COMMENT '',
+    `k2`   String                COMMENT '',
+    `k3`   String                COMMENT '',
+    `uuid` BITMAP  BITMAP_UNION  COMMENT 'bitmap'
+)
+AGGREGATE KEY(k1, k2, k3)
+DISTRIBUTED BY HASH(`user_id`) BUCKETS 1
+PROPERTIES (
+    "replication_allocation" = "tag.location.default: 1"
+);
+```
+
+4. Inserting data from Hive into Doris
+
+```sql
+insert into doris_bitmap_table select k1, k2, k3, bitmap_from_base64(uuid) 
from hive.test.hive_bitmap_table;
+```
+
+### Method 2:Spark Load
+
  see details: [Spark 
Load](../data-operate/import/import-way/spark-load-manual.md) -> Basic 
operation -> Create load(Example 3: when the upstream data source is hive 
binary type table)
diff --git a/docs/zh-CN/docs/ecosystem/hive-bitmap-udf.md 
b/docs/zh-CN/docs/ecosystem/hive-bitmap-udf.md
index 7f886352ed4..d10a9250283 100644
--- a/docs/zh-CN/docs/ecosystem/hive-bitmap-udf.md
+++ b/docs/zh-CN/docs/ecosystem/hive-bitmap-udf.md
@@ -74,7 +74,7 @@ mvn package -Dmaven.test.skip=true
 --也可以只打hive-udf module
 mvn package -pl hive-udf -am -Dmaven.test.skip=true
 ```
-打包编译完成进入hive-udf目录会有target目录,里面就会有打包完成的hive-udf-jar-with-dependencies.jar包
+打包编译完成进入hive-udf目录会有target目录,里面就会有打包完成的hive-udf.jar包
 
 ```sql
 
@@ -117,4 +117,58 @@ select k1,bitmap_union(uuid) from hive_bitmap_table group 
by k1
 
 ## Hive bitmap 导入 doris
 
+<version since="2.0.2">
+
+### 方法一:Catalog (推荐)
+
+</version>
+
+创建 Hive 表指定为 TEXT 格式,此时,对于 Binary 类型,Hive 会以 bash64 编码的字符串形式保存,此时可以通过 Hive 
Catalog 的形式,直接将位图数据通过 bitmap_from_bash64 函数插入到 Doris 内部。
+
+以下是一个完整的例子:
+
+1. 在 Hive 中创建 Hive 表
+
+```sql
+CREATE TABLE IF NOT EXISTS `test`.`hive_bitmap_table`(
+`k1`   int       COMMENT '',
+`k2`   String    COMMENT '',
+`k3`   String    COMMENT '',
+`uuid` binary    COMMENT 'bitmap'
+) stored as textfile 
+```
+
+2. [在 Doris 中创建 Catalog](../lakehouse/multi-catalog/hive)
+
+```sql
+CREATE CATALOG hive PROPERTIES (
+    'type'='hms',
+    'hive.metastore.uris' = 'thrift://127.0.0.1:9083'
+);
+```
+
+3. 创建 Doris 内表
+
+```sql
+CREATE TABLE IF NOT EXISTS `test`.`doris_bitmap_table`(
+    `k1`   int                   COMMENT '',
+    `k2`   String                COMMENT '',
+    `k3`   String                COMMENT '',
+    `uuid` BITMAP  BITMAP_UNION  COMMENT 'bitmap'
+)
+AGGREGATE KEY(k1, k2, k3)
+DISTRIBUTED BY HASH(`user_id`) BUCKETS 1
+PROPERTIES (
+    "replication_allocation" = "tag.location.default: 1"
+);
+```
+
+4. 从 Hive 插入数据到 Doris 中
+
+```sql
+insert into doris_bitmap_table select k1, k2, k3, bitmap_from_base64(uuid) 
from hive.test.hive_bitmap_table;
+```
+
+### 方法二:Spark Load
+
  详见: [Spark Load](../data-operate/import/import-way/spark-load-manual.md) -> 
基本操作  -> 创建导入 (示例3:上游数据源是hive binary类型情况)
diff --git a/fe/hive-udf/pom.xml b/fe/hive-udf/pom.xml
index eb970b399f9..dbb620e6596 100644
--- a/fe/hive-udf/pom.xml
+++ b/fe/hive-udf/pom.xml
@@ -35,22 +35,37 @@ under the License.
     </properties>
     <dependencies>
         <dependency>
-            <groupId>org.apache.doris</groupId>
-            <artifactId>hive-catalog-shade</artifactId>
-        </dependency>
-        <!-- 
https://mvnrepository.com/artifact/org.apache.httpcomponents/httpclient -->
-        <dependency>
-            <groupId>org.apache.httpcomponents</groupId>
-            <artifactId>httpclient</artifactId>
-        </dependency>
-        <dependency>
-            <groupId>org.apache.velocity</groupId>
-            <artifactId>velocity-engine-core</artifactId>
+            <groupId>org.apache.hive</groupId>
+            <artifactId>hive-exec</artifactId>
+            <version>${hive.version}</version>
+            <scope>provided</scope>
         </dependency>
         <dependency>
             <groupId>${project.groupId}</groupId>
             <artifactId>fe-common</artifactId>
             <version>${project.version}</version>
+            <exclusions>
+                <exclusion>
+                    <groupId>org.apache.hive</groupId>
+                    <artifactId>*</artifactId>
+                </exclusion>
+                <exclusion>
+                    <groupId>commons-codec</groupId>
+                    <artifactId>*</artifactId>
+                </exclusion>
+                <exclusion>
+                    <groupId>com.google.guava</groupId>
+                    <artifactId>*</artifactId>
+                </exclusion>
+                <exclusion>
+                    <groupId>org.aspectj</groupId>
+                    <artifactId>*</artifactId>
+                </exclusion>
+                <exclusion>
+                    <groupId>org.apache.httpcomponents</groupId>
+                    <artifactId>*</artifactId>
+                </exclusion>
+            </exclusions>
         </dependency>
     </dependencies>
     <build>
@@ -74,23 +89,39 @@ under the License.
 
             <plugin>
                 <groupId>org.apache.maven.plugins</groupId>
-                <artifactId>maven-assembly-plugin</artifactId>
+                <artifactId>maven-shade-plugin</artifactId>
                 <configuration>
-                    <descriptorRefs>
-                        <descriptorRef>jar-with-dependencies</descriptorRef>
-                    </descriptorRefs>
-                    <archive>
-                        <manifest>
-                            <mainClass></mainClass>
-                        </manifest>
-                    </archive>
+                    <minimizeJar>true</minimizeJar>
+                    <relocations>
+                        <relocation>
+                            <pattern>org.joda.time</pattern>
+                            
<shadedPattern>shade.doris.org.joda.time</shadedPattern>
+                        </relocation>
+                        <relocation>
+                            <pattern>org.roaringbitmap</pattern>
+                            
<shadedPattern>shade.doris.org.roaringbitmap</shadedPattern>
+                        </relocation>
+                    </relocations>
+                    <filters>
+                        <filter>
+                            <artifact>org.apache.logging.log4j:*</artifact>
+                            <excludes>
+                                <exclude>**</exclude>
+                            </excludes>
+                        </filter>
+                        <filter>
+                            <artifact>org.awaitility:*</artifact>
+                            <excludes>
+                                <exclude>**</exclude>
+                            </excludes>
+                        </filter>
+                    </filters>
                 </configuration>
                 <executions>
                     <execution>
-                        <id>make-assembly</id>
                         <phase>package</phase>
                         <goals>
-                            <goal>single</goal>
+                            <goal>shade</goal>
                         </goals>
                     </execution>
                 </executions>


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to