This is an automated email from the ASF dual-hosted git repository.

michaelsmith pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit 23cdc7edebd0e1ecd3c1f2669d423503fd2203e4
Author: Michael Smith <[email protected]>
AuthorDate: Fri Jan 27 15:55:43 2023 -0800

    IMPALA-11867: [DOCS] Document Ozone support
    
    Adds a topic documenting Apache Ozone support, and recommends using the
    ofs protocol.
    
    Change-Id: I724a40c086fe0466646e7e108645fd8dbaee5f1d
    Reviewed-on: http://gerrit.cloudera.org:8080/19448
    Reviewed-by: Impala Public Jenkins <[email protected]>
    Tested-by: Impala Public Jenkins <[email protected]>
---
 docs/impala.ditamap                      |   1 +
 docs/impala_keydefs.ditamap              |   6 ++
 docs/shared/impala_common.xml            |  11 ++++
 docs/topics/impala_ozone.xml             | 103 +++++++++++++++++++++++++++++++
 docs/topics/impala_parquet_file_size.xml |   3 +
 docs/topics/impala_tables.xml            |   8 +--
 6 files changed, 128 insertions(+), 4 deletions(-)

diff --git a/docs/impala.ditamap b/docs/impala.ditamap
index b8c3dad72..dff2672cd 100644
--- a/docs/impala.ditamap
+++ b/docs/impala.ditamap
@@ -335,6 +335,7 @@ under the License.
   <topicref href="topics/impala_s3.xml"/>
   <topicref rev="2.9.0" href="topics/impala_adls.xml"/>
   <topicref href="topics/impala_isilon.xml"/>
+  <topicref rev="4.2.0" href="topics/impala_ozone.xml"/>
   <topicref href="topics/impala_logging.xml"/>
   <topicref href="topics/impala_client.xml">
     <topicref href="topics/impala_impala_shell.xml">
diff --git a/docs/impala_keydefs.ditamap b/docs/impala_keydefs.ditamap
index bedc449b6..737da1089 100644
--- a/docs/impala_keydefs.ditamap
+++ b/docs/impala_keydefs.ditamap
@@ -57,6 +57,10 @@ under the License.
     <topicmeta><linktext>the Apache Iceberg site</linktext></topicmeta>
   </keydef>
 
+  <keydef href="https://ozone.apache.org"; scope="external" format="html" 
keys="upstream_ozone_site">
+    <topicmeta><linktext>the Apache Ozone site</linktext></topicmeta>
+  </keydef>
+
   <keydef href="https://hbase.apache.org/book.html#security"; scope="external" 
format="html" keys="upstream_hbase_security_docs">
     <topicmeta><linktext>the Security chapter in the Apache HBase 
documentation</linktext></topicmeta>
   </keydef>
@@ -10525,6 +10529,7 @@ under the License.
   <keydef href="https://issues.apache.org/jira/browse/IMPALA-9999"; 
scope="external" format="html" keys="IMPALA-9999"/>
 
 <!-- Short form of mapping from Impala release to vendor-specific releases, 
for use in headings. -->
+  <keydef keys="impala42"><topicmeta><keywords><keyword>Impala 
4.2</keyword></keywords></topicmeta></keydef>
   <keydef keys="impala41"><topicmeta><keywords><keyword>Impala 
4.1</keyword></keywords></topicmeta></keydef>
   <keydef keys="impala40"><topicmeta><keywords><keyword>Impala 
4.0</keyword></keywords></topicmeta></keydef>
   <keydef keys="impala34"><topicmeta><keywords><keyword>Impala 
3.4</keyword></keywords></topicmeta></keydef>
@@ -11059,6 +11064,7 @@ under the License.
 
   <keydef href="topics/impala_s3.xml" keys="s3"/>
   <keydef href="topics/impala_isilon.xml" keys="impala_isilon"/>
+  <keydef href="topics/impala_ozone.xml" keys="impala_ozone"/>
 
   <keydef href="topics/impala_logging.xml" keys="logging"/>
   <keydef href="topics/impala_logging.xml#logs_details" keys="logs_details"/>
diff --git a/docs/shared/impala_common.xml b/docs/shared/impala_common.xml
index ee8d9fbe5..27eca96a7 100644
--- a/docs/shared/impala_common.xml
+++ b/docs/shared/impala_common.xml
@@ -3348,6 +3348,17 @@ flight_num:           INT32 SNAPPY DO:83456393 
FPO:83488603 SZ:10216514/11474301
 <codeblock>isi hdfs settings modify --default-block-size=256MB</codeblock>
       </p>
 
+      <p id="ozone_blurb" rev="4.2.0">
+        <b>Ozone considerations:</b>
+      </p>
+
+      <p id="ozone_block_size_caveat" rev="4.2.0">
+        Because Apache Ozone storage buckets use a global value for the block 
size rather than
+        a configurable value for each file, the 
<codeph>PARQUET_FILE_SIZE</codeph> query option
+        has no effect when Impala inserts data into a table or partition 
residing on Ozone
+        storage.
+      </p>
+
       <p id="hbase_blurb">
         <b>HBase considerations:</b>
       </p>
diff --git a/docs/topics/impala_ozone.xml b/docs/topics/impala_ozone.xml
new file mode 100644
index 000000000..f4db61794
--- /dev/null
+++ b/docs/topics/impala_ozone.xml
@@ -0,0 +1,103 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied.  See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept rev="4.2.0" id="impala_ozone">
+
+  <title>Using Impala with Apache Ozone Storage</title>
+
+  <titlealts audience="PDF">
+    <navtitle>Ozone Storage</navtitle>
+  </titlealts>
+
+  <prolog>
+    <metadata>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="Ozone"/>
+      <data name="Category" value="Disk Storage"/>
+      <data name="Category" value="Administrators"/>
+      <data name="Category" value="Developers"/>
+      <data name="Category" value="Data Analysts"/>
+    </metadata>
+  </prolog>
+
+  <conbody>
+
+    <p>
+      <indexterm audience="hidden">Ozone</indexterm>
+      You can use Impala to query data files that reside on Apache Ozone 
distributed storage,
+      rather than in HDFS. The combination of the Impala query engine and 
Apache Ozone storage
+      is certified on <keyword keyref="impala42"/> or higher.
+    </p>
+
+    <p>
+      For more information on Ozone, see <xref keyref="upstream_ozone_site"/>.
+    </p>
+
+    <p>
+      The typical use case for Impala and Ozone together is to use Ozone for 
the default
+      filesystem, replacing HDFS entirely. In this configuration, when you 
create a database,
+      table, or partition, the data always resides on Ozone storage and you do 
not need to
+      specify any special <codeph>LOCATION</codeph> attribute. If you do 
specify a
+      <codeph>LOCATION</codeph> attribute, its value refers to a path within 
the Ozone
+      filesystem. For example:
+    </p>
+
+<codeblock>-- If the default filesystem is Ozone, all Impala data resides there
+-- and all Impala databases and tables are located there.
+CREATE TABLE t1 (x INT, s STRING);
+
+-- You can specify LOCATION for database, table, or partition,
+-- using values from the Ozone filesystem.
+CREATE DATABASE d1 LOCATION '/some/path/on/ozone/server/d1.db';
+CREATE TABLE d1.t2 (a TINYINT, b BOOLEAN);
+</codeblock>
+
+    <p>
+      Impala can write to, delete, and rename data files and database, table, 
and partition
+      directories on Ozone storage. Therefore, Impala statements such as 
<codeph>CREATE
+      TABLE</codeph>, <codeph>DROP TABLE</codeph>, <codeph>CREATE 
DATABASE</codeph>,
+      <codeph>DROP DATABASE</codeph>, <codeph>ALTER TABLE</codeph>, and 
<codeph>INSERT</codeph>
+      work the same with Ozone storage as with HDFS.
+    </p>
+
+    <p>
+      Ozone supports multiple protocols: <codeph>ofs</codeph>, 
<codeph>o3fs</codeph>, and
+      <codeph>s3a</codeph>. Impala supports reading <codeph>ofs</codeph> and 
<codeph>o3fs</codeph>.
+      Impala can also read <codeph>s3a</codeph> (see <xref 
href="impala_s3.xml#s3"/>). However
+      <codeph>ofs</codeph> is their newer protocol, and the only one Impala 
supports as a default
+      filesystem. We recommend using it for <xref href="impala_ddl.xml#ddl"/> 
to avoid access
+      limitations, and for <xref href="impala_dml.xml#dml"/> and
+      <xref href="impala_select.xml#select"/> for performance.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/ozone_block_size_caveat"/>
+
+    <p>
+      Impala's spill-to-disk feature may be configured to use Ozone storage by 
specifying a full
+      URI (e.g. <codeph>ofs://host:port/volume/bucket/key</codeph>) for the 
spill location. See
+      <xref href="impala_disk_space.xml#disk_space"/> for details on 
configuring remote
+      spill-to-disk.
+    </p>
+
+<!-- <p outputclass="toc inpage"/> -->
+
+  </conbody>
+
+</concept>
diff --git a/docs/topics/impala_parquet_file_size.xml 
b/docs/topics/impala_parquet_file_size.xml
index 05e6c366e..0a824e37c 100644
--- a/docs/topics/impala_parquet_file_size.xml
+++ b/docs/topics/impala_parquet_file_size.xml
@@ -93,6 +93,9 @@ INSERT OVERWRITE parquet_table SELECT * FROM text_table;
     <p conref="../shared/impala_common.xml#common/isilon_blurb"/>
     <p conref="../shared/impala_common.xml#common/isilon_block_size_caveat"/>
 
+    <p conref="../shared/impala_common.xml#common/ozone_blurb"/>
+    <p conref="../shared/impala_common.xml#common/ozone_block_size_caveat"/>
+
     <p conref="../shared/impala_common.xml#common/related_info"/>
 
     <p>
diff --git a/docs/topics/impala_tables.xml b/docs/topics/impala_tables.xml
index 978dd3c19..82f147cc0 100644
--- a/docs/topics/impala_tables.xml
+++ b/docs/topics/impala_tables.xml
@@ -73,10 +73,10 @@ under the License.
     </ul>
 
     <p rev="2.2.0">
-      Impala tables can also represent data that is stored in HBase, or in the 
Amazon S3 filesystem (<keyword keyref="impala22_full"/> or higher),
-      or on Isilon storage devices (<keyword keyref="impala223_full"/> or 
higher).  See <xref href="impala_hbase.xml#impala_hbase"/>,
-      <xref href="impala_s3.xml#s3"/>, and <xref 
href="impala_isilon.xml#impala_isilon"/>
-      for details about those special kinds of tables.
+      Impala tables can also represent data that is stored in HBase, in the 
Amazon S3 filesystem (<keyword keyref="impala22_full"/> or higher),
+      on Isilon storage devices (<keyword keyref="impala223_full"/> or 
higher), or in Apache Ozone (<keyword keyref="impala42_full"/> or higher).
+      See <xref href="impala_hbase.xml#impala_hbase"/>, <xref 
href="impala_s3.xml#s3"/>, <xref href="impala_isilon.xml#impala_isilon"/>, and
+      <xref href="impala_ozone.xml#impala_ozone"/> for details about those 
special kinds of tables.
     </p>
 
     <p conref="../shared/impala_common.xml#common/ignore_file_extensions"/>

Reply via email to