This is an automated email from the ASF dual-hosted git repository. michaelsmith pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/impala.git
commit 23cdc7edebd0e1ecd3c1f2669d423503fd2203e4 Author: Michael Smith <[email protected]> AuthorDate: Fri Jan 27 15:55:43 2023 -0800 IMPALA-11867: [DOCS] Document Ozone support Adds a topic documenting Apache Ozone support, and recommends using the ofs protocol. Change-Id: I724a40c086fe0466646e7e108645fd8dbaee5f1d Reviewed-on: http://gerrit.cloudera.org:8080/19448 Reviewed-by: Impala Public Jenkins <[email protected]> Tested-by: Impala Public Jenkins <[email protected]> --- docs/impala.ditamap | 1 + docs/impala_keydefs.ditamap | 6 ++ docs/shared/impala_common.xml | 11 ++++ docs/topics/impala_ozone.xml | 103 +++++++++++++++++++++++++++++++ docs/topics/impala_parquet_file_size.xml | 3 + docs/topics/impala_tables.xml | 8 +-- 6 files changed, 128 insertions(+), 4 deletions(-) diff --git a/docs/impala.ditamap b/docs/impala.ditamap index b8c3dad72..dff2672cd 100644 --- a/docs/impala.ditamap +++ b/docs/impala.ditamap @@ -335,6 +335,7 @@ under the License. <topicref href="topics/impala_s3.xml"/> <topicref rev="2.9.0" href="topics/impala_adls.xml"/> <topicref href="topics/impala_isilon.xml"/> + <topicref rev="4.2.0" href="topics/impala_ozone.xml"/> <topicref href="topics/impala_logging.xml"/> <topicref href="topics/impala_client.xml"> <topicref href="topics/impala_impala_shell.xml"> diff --git a/docs/impala_keydefs.ditamap b/docs/impala_keydefs.ditamap index bedc449b6..737da1089 100644 --- a/docs/impala_keydefs.ditamap +++ b/docs/impala_keydefs.ditamap @@ -57,6 +57,10 @@ under the License. <topicmeta><linktext>the Apache Iceberg site</linktext></topicmeta> </keydef> + <keydef href="https://ozone.apache.org" scope="external" format="html" keys="upstream_ozone_site"> + <topicmeta><linktext>the Apache Ozone site</linktext></topicmeta> + </keydef> + <keydef href="https://hbase.apache.org/book.html#security" scope="external" format="html" keys="upstream_hbase_security_docs"> <topicmeta><linktext>the Security chapter in the Apache HBase documentation</linktext></topicmeta> </keydef> @@ -10525,6 +10529,7 @@ under the License. <keydef href="https://issues.apache.org/jira/browse/IMPALA-9999" scope="external" format="html" keys="IMPALA-9999"/> <!-- Short form of mapping from Impala release to vendor-specific releases, for use in headings. --> + <keydef keys="impala42"><topicmeta><keywords><keyword>Impala 4.2</keyword></keywords></topicmeta></keydef> <keydef keys="impala41"><topicmeta><keywords><keyword>Impala 4.1</keyword></keywords></topicmeta></keydef> <keydef keys="impala40"><topicmeta><keywords><keyword>Impala 4.0</keyword></keywords></topicmeta></keydef> <keydef keys="impala34"><topicmeta><keywords><keyword>Impala 3.4</keyword></keywords></topicmeta></keydef> @@ -11059,6 +11064,7 @@ under the License. <keydef href="topics/impala_s3.xml" keys="s3"/> <keydef href="topics/impala_isilon.xml" keys="impala_isilon"/> + <keydef href="topics/impala_ozone.xml" keys="impala_ozone"/> <keydef href="topics/impala_logging.xml" keys="logging"/> <keydef href="topics/impala_logging.xml#logs_details" keys="logs_details"/> diff --git a/docs/shared/impala_common.xml b/docs/shared/impala_common.xml index ee8d9fbe5..27eca96a7 100644 --- a/docs/shared/impala_common.xml +++ b/docs/shared/impala_common.xml @@ -3348,6 +3348,17 @@ flight_num: INT32 SNAPPY DO:83456393 FPO:83488603 SZ:10216514/11474301 <codeblock>isi hdfs settings modify --default-block-size=256MB</codeblock> </p> + <p id="ozone_blurb" rev="4.2.0"> + <b>Ozone considerations:</b> + </p> + + <p id="ozone_block_size_caveat" rev="4.2.0"> + Because Apache Ozone storage buckets use a global value for the block size rather than + a configurable value for each file, the <codeph>PARQUET_FILE_SIZE</codeph> query option + has no effect when Impala inserts data into a table or partition residing on Ozone + storage. + </p> + <p id="hbase_blurb"> <b>HBase considerations:</b> </p> diff --git a/docs/topics/impala_ozone.xml b/docs/topics/impala_ozone.xml new file mode 100644 index 000000000..f4db61794 --- /dev/null +++ b/docs/topics/impala_ozone.xml @@ -0,0 +1,103 @@ +<?xml version="1.0" encoding="UTF-8"?> +<!-- +Licensed to the Apache Software Foundation (ASF) under one +or more contributor license agreements. See the NOTICE file +distributed with this work for additional information +regarding copyright ownership. The ASF licenses this file +to you under the Apache License, Version 2.0 (the +"License"); you may not use this file except in compliance +with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, +software distributed under the License is distributed on an +"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +KIND, either express or implied. See the License for the +specific language governing permissions and limitations +under the License. +--> +<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd"> +<concept rev="4.2.0" id="impala_ozone"> + + <title>Using Impala with Apache Ozone Storage</title> + + <titlealts audience="PDF"> + <navtitle>Ozone Storage</navtitle> + </titlealts> + + <prolog> + <metadata> + <data name="Category" value="Impala"/> + <data name="Category" value="Ozone"/> + <data name="Category" value="Disk Storage"/> + <data name="Category" value="Administrators"/> + <data name="Category" value="Developers"/> + <data name="Category" value="Data Analysts"/> + </metadata> + </prolog> + + <conbody> + + <p> + <indexterm audience="hidden">Ozone</indexterm> + You can use Impala to query data files that reside on Apache Ozone distributed storage, + rather than in HDFS. The combination of the Impala query engine and Apache Ozone storage + is certified on <keyword keyref="impala42"/> or higher. + </p> + + <p> + For more information on Ozone, see <xref keyref="upstream_ozone_site"/>. + </p> + + <p> + The typical use case for Impala and Ozone together is to use Ozone for the default + filesystem, replacing HDFS entirely. In this configuration, when you create a database, + table, or partition, the data always resides on Ozone storage and you do not need to + specify any special <codeph>LOCATION</codeph> attribute. If you do specify a + <codeph>LOCATION</codeph> attribute, its value refers to a path within the Ozone + filesystem. For example: + </p> + +<codeblock>-- If the default filesystem is Ozone, all Impala data resides there +-- and all Impala databases and tables are located there. +CREATE TABLE t1 (x INT, s STRING); + +-- You can specify LOCATION for database, table, or partition, +-- using values from the Ozone filesystem. +CREATE DATABASE d1 LOCATION '/some/path/on/ozone/server/d1.db'; +CREATE TABLE d1.t2 (a TINYINT, b BOOLEAN); +</codeblock> + + <p> + Impala can write to, delete, and rename data files and database, table, and partition + directories on Ozone storage. Therefore, Impala statements such as <codeph>CREATE + TABLE</codeph>, <codeph>DROP TABLE</codeph>, <codeph>CREATE DATABASE</codeph>, + <codeph>DROP DATABASE</codeph>, <codeph>ALTER TABLE</codeph>, and <codeph>INSERT</codeph> + work the same with Ozone storage as with HDFS. + </p> + + <p> + Ozone supports multiple protocols: <codeph>ofs</codeph>, <codeph>o3fs</codeph>, and + <codeph>s3a</codeph>. Impala supports reading <codeph>ofs</codeph> and <codeph>o3fs</codeph>. + Impala can also read <codeph>s3a</codeph> (see <xref href="impala_s3.xml#s3"/>). However + <codeph>ofs</codeph> is their newer protocol, and the only one Impala supports as a default + filesystem. We recommend using it for <xref href="impala_ddl.xml#ddl"/> to avoid access + limitations, and for <xref href="impala_dml.xml#dml"/> and + <xref href="impala_select.xml#select"/> for performance. + </p> + + <p conref="../shared/impala_common.xml#common/ozone_block_size_caveat"/> + + <p> + Impala's spill-to-disk feature may be configured to use Ozone storage by specifying a full + URI (e.g. <codeph>ofs://host:port/volume/bucket/key</codeph>) for the spill location. See + <xref href="impala_disk_space.xml#disk_space"/> for details on configuring remote + spill-to-disk. + </p> + +<!-- <p outputclass="toc inpage"/> --> + + </conbody> + +</concept> diff --git a/docs/topics/impala_parquet_file_size.xml b/docs/topics/impala_parquet_file_size.xml index 05e6c366e..0a824e37c 100644 --- a/docs/topics/impala_parquet_file_size.xml +++ b/docs/topics/impala_parquet_file_size.xml @@ -93,6 +93,9 @@ INSERT OVERWRITE parquet_table SELECT * FROM text_table; <p conref="../shared/impala_common.xml#common/isilon_blurb"/> <p conref="../shared/impala_common.xml#common/isilon_block_size_caveat"/> + <p conref="../shared/impala_common.xml#common/ozone_blurb"/> + <p conref="../shared/impala_common.xml#common/ozone_block_size_caveat"/> + <p conref="../shared/impala_common.xml#common/related_info"/> <p> diff --git a/docs/topics/impala_tables.xml b/docs/topics/impala_tables.xml index 978dd3c19..82f147cc0 100644 --- a/docs/topics/impala_tables.xml +++ b/docs/topics/impala_tables.xml @@ -73,10 +73,10 @@ under the License. </ul> <p rev="2.2.0"> - Impala tables can also represent data that is stored in HBase, or in the Amazon S3 filesystem (<keyword keyref="impala22_full"/> or higher), - or on Isilon storage devices (<keyword keyref="impala223_full"/> or higher). See <xref href="impala_hbase.xml#impala_hbase"/>, - <xref href="impala_s3.xml#s3"/>, and <xref href="impala_isilon.xml#impala_isilon"/> - for details about those special kinds of tables. + Impala tables can also represent data that is stored in HBase, in the Amazon S3 filesystem (<keyword keyref="impala22_full"/> or higher), + on Isilon storage devices (<keyword keyref="impala223_full"/> or higher), or in Apache Ozone (<keyword keyref="impala42_full"/> or higher). + See <xref href="impala_hbase.xml#impala_hbase"/>, <xref href="impala_s3.xml#s3"/>, <xref href="impala_isilon.xml#impala_isilon"/>, and + <xref href="impala_ozone.xml#impala_ozone"/> for details about those special kinds of tables. </p> <p conref="../shared/impala_common.xml#common/ignore_file_extensions"/>
