This is an automated email from the ASF dual-hosted git repository.
michaelsmith pushed a commit to branch asf-site
in repository https://gitbox.apache.org/repos/asf/impala.git
The following commit(s) were added to refs/heads/asf-site by this push:
new d1e1ca288 BLOG: Add Codegen cache for low latency queries
d1e1ca288 is described below
commit d1e1ca28803c07eaff0314f635c5e26835e95c53
Author: Michael Smith <[email protected]>
AuthorDate: Tue Oct 29 14:47:06 2024 -0700
BLOG: Add Codegen cache for low latency queries
Adds blog post "Codegen cache for low latency queries", also posted to
https://medium.com/engineering-cloudera/codegen-cache-for-low-latency-queries-47d5fd947fcf
Implements multiple author support based on
https://github.com/getnikola/nikola/pull/3456.
Also fixes up inconsistent title "Impalas living on Iceberg" to match
slides.
Change-Id: I38cebcccdd61b5f10bdb1dd6fecf5584646290e4
Reviewed-on: http://gerrit.cloudera.org:8080/21990
Reviewed-by: Riza Suminto <[email protected]>
Reviewed-by: Yida Wu <[email protected]>
Tested-by: Michael Smith <[email protected]>
---
blog/2024/index.html | 5 +-
blog/archive.html | 2 +-
blog/assets/css/additional_styles.css | 4 +
.../{gabor-kaszab => abhishek-rawat}/index.html | 8 +-
.../{gabor-kaszab => david-rorke}/index.html | 8 +-
blog/authors/gabor-kaszab/index.html | 2 +-
blog/authors/index.html | 6 +-
.../{gabor-kaszab => michael-smith}/index.html | 8 +-
blog/authors/{gabor-kaszab => yida-wu}/index.html | 8 +-
blog/categories/cat_blogs/index.html | 3 +
blog/categories/cat_talks/index.html | 2 +-
blog/categories/ccna24/index.html | 2 +-
blog/categories/{cat_posts => codegen}/index.html | 8 +-
blog/categories/index.html | 1 +
blog/images/codegen-cache-perf.png | Bin 0 -> 101425 bytes
blog/images/codegen-cache-perf.thumbnail.png | Bin 0 -> 25470 bytes
blog/images/query-exec.png | Bin 0 -> 31753 bytes
blog/images/query-exec.thumbnail.png | Bin 0 -> 29551 bytes
blog/{archive.html => index-1.html} | 62 ++++--
blog/index.html | 161 +++++++++++----
.../index.html | 225 +++++++++++++++++++++
.../healing-iceberg-tables-with-impala/index.html | 4 +
blog/posts/impalas-living-on-iceberg/index.html | 4 +-
.../index.html | 4 +-
blog/sitemap.xml | 84 +++++---
blog/sitemapindex.xml | 2 +-
nikola_site_generator/conf.py | 4 +
.../images/codegen-cache-perf.png | Bin 0 -> 111887 bytes
nikola_site_generator/images/query-exec.png | Bin 0 -> 32029 bytes
.../posts/codegen-cache-for-low-latency-queries.md | 142 +++++++++++++
.../posts/impalas-living-on-iceberg.md | 2 +-
.../impala-theme/assets/css/additional_styles.css | 4 +
.../themes/impala-theme/templates/index.tmpl | 6 +-
.../themes/impala-theme/templates/post_header.tmpl | 6 +-
34 files changed, 658 insertions(+), 119 deletions(-)
diff --git a/blog/2024/index.html b/blog/2024/index.html
index 96d8d4f76..31e6752a3 100644
--- a/blog/2024/index.html
+++ b/blog/2024/index.html
@@ -57,13 +57,16 @@
<main id="content"><article class="listpage"><header><h1>Posts for
year 2024</h1>
</header><ul class="postlist">
<li>
+<time class="listdate" datetime="2024-10-29T14:00:00-07:00" title="2024-10-29
14:00">2024-10-29 14:00</time><a
href="../posts/codegen-cache-for-low-latency-queries/"
class="listtitle">Codegen cache for low latency queries</a>
+</li>
+ <li>
<time class="listdate" datetime="2024-10-10T10:55:00-06:00" title="2024-10-10
10:55">2024-10-10 10:55</time><a
href="../posts/healing-iceberg-tables-with-impala/" class="listtitle">Healing
Iceberg Tables with Impala</a>
</li>
<li>
<time class="listdate" datetime="2024-10-08T14:50:00-06:00" title="2024-10-08
14:50">2024-10-08 14:50</time><a
href="../posts/intelligent-utilization-aware-autoscaling-for-impala-virtual-compute-clusters/"
class="listtitle">Intelligent Utilization Aware Autoscaling for Impala Virtual
Compute Clusters</a>
</li>
<li>
-<time class="listdate" datetime="2024-10-07T16:00:00-06:00" title="2024-10-07
16:00">2024-10-07 16:00</time><a href="../posts/impalas-living-on-iceberg/"
class="listtitle">Impala's Living on Iceberg</a>
+<time class="listdate" datetime="2024-10-07T16:00:00-06:00" title="2024-10-07
16:00">2024-10-07 16:00</time><a href="../posts/impalas-living-on-iceberg/"
class="listtitle">Impalas living on Iceberg</a>
</li>
<li>
<time class="listdate" datetime="2024-07-27T07:09:35-07:00" title="2024-07-27
07:09">2024-07-27 07:09</time><a
href="../posts/this-impala-not-only-reads-but-modifies-and-optimizes-iceberg-tables/"
class="listtitle">This Impala not only reads, but modifies and optimizes
Iceberg tables</a>
diff --git a/blog/archive.html b/blog/archive.html
index 64e1c7cf4..ed8c2da6c 100644
--- a/blog/archive.html
+++ b/blog/archive.html
@@ -58,7 +58,7 @@
</header><ul class="postlist">
<li>
<a href="2024/">2024</a>
- (6)
+ (7)
</li>
<li>
<a href="2017/">2017</a>
diff --git a/blog/assets/css/additional_styles.css
b/blog/assets/css/additional_styles.css
index e7ae1cc58..6eb731774 100644
--- a/blog/assets/css/additional_styles.css
+++ b/blog/assets/css/additional_styles.css
@@ -22,6 +22,10 @@ body {
padding-bottom: 40px;
}
+.byline a:not(:last-child):after {
+ content: ",";
+}
+
/* Custom container */
.container-narrow {
margin: 0 auto;
diff --git a/blog/authors/gabor-kaszab/index.html
b/blog/authors/abhishek-rawat/index.html
similarity index 92%
copy from blog/authors/gabor-kaszab/index.html
copy to blog/authors/abhishek-rawat/index.html
index cee9d5112..9c1fa88e4 100644
--- a/blog/authors/gabor-kaszab/index.html
+++ b/blog/authors/abhishek-rawat/index.html
@@ -5,12 +5,12 @@
<meta name="keywords" content="hadoop, impala, sql, mpp, bi, big data, open
source">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<meta charset="utf-8">
-<title>Posts by Gabor Kaszab | Apache Impala</title>
+<title>Posts by Abhishek Rawat | Apache Impala</title>
<link href="../../assets/css/bootstrap.min.css" rel="stylesheet"
type="text/css">
<link href="../../assets/css/bootstrap-responsive.min.css" rel="stylesheet"
type="text/css">
<!-- order is significant to prevent overwriting of some bootstrap-defined css
styles --><link href="../../assets/css/additional_styles.css" rel="stylesheet"
type="text/css">
<script
src="https://ajax.googleapis.com/ajax/libs/jquery/1.11.3/jquery.min.js"></script><script
src="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.5/js/bootstrap.min.js"></script><meta
content="#5670d4" name="theme-color">
-<link rel="canonical"
href="https://impala.apache.org/blog/authors/gabor-kaszab/">
+<link rel="canonical"
href="https://impala.apache.org/blog/authors/abhishek-rawat/">
</head>
<body id="index" class="home">
<div class="container">
@@ -53,13 +53,13 @@
</nav></header>
</div>
<!-- masthead -->
- <main id="content"><article class="authorpage"><header><h1>Posts by
Gabor Kaszab</h1>
+ <main id="content"><article class="authorpage"><header><h1>Posts by
Abhishek Rawat</h1>
<div class="metadata">
</div>
</header><ul class="postlist">
<li>
-<time class="listdate" datetime="2024-10-07T16:00:00-06:00" title="2024-10-07
16:00">2024-10-07 16:00</time><a href="../../posts/impalas-living-on-iceberg/"
class="listtitle">Impala's Living on Iceberg</a>
+<time class="listdate" datetime="2024-10-29T14:00:00-07:00" title="2024-10-29
14:00">2024-10-29 14:00</time><a
href="../../posts/codegen-cache-for-low-latency-queries/"
class="listtitle">Codegen cache for low latency queries</a>
</li>
</ul></article></main><footer id="footer"><p> </p>
<div class="navbar">
diff --git a/blog/authors/gabor-kaszab/index.html
b/blog/authors/david-rorke/index.html
similarity index 92%
copy from blog/authors/gabor-kaszab/index.html
copy to blog/authors/david-rorke/index.html
index cee9d5112..b1d4fad1e 100644
--- a/blog/authors/gabor-kaszab/index.html
+++ b/blog/authors/david-rorke/index.html
@@ -5,12 +5,12 @@
<meta name="keywords" content="hadoop, impala, sql, mpp, bi, big data, open
source">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<meta charset="utf-8">
-<title>Posts by Gabor Kaszab | Apache Impala</title>
+<title>Posts by David Rorke | Apache Impala</title>
<link href="../../assets/css/bootstrap.min.css" rel="stylesheet"
type="text/css">
<link href="../../assets/css/bootstrap-responsive.min.css" rel="stylesheet"
type="text/css">
<!-- order is significant to prevent overwriting of some bootstrap-defined css
styles --><link href="../../assets/css/additional_styles.css" rel="stylesheet"
type="text/css">
<script
src="https://ajax.googleapis.com/ajax/libs/jquery/1.11.3/jquery.min.js"></script><script
src="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.5/js/bootstrap.min.js"></script><meta
content="#5670d4" name="theme-color">
-<link rel="canonical"
href="https://impala.apache.org/blog/authors/gabor-kaszab/">
+<link rel="canonical"
href="https://impala.apache.org/blog/authors/david-rorke/">
</head>
<body id="index" class="home">
<div class="container">
@@ -53,13 +53,13 @@
</nav></header>
</div>
<!-- masthead -->
- <main id="content"><article class="authorpage"><header><h1>Posts by
Gabor Kaszab</h1>
+ <main id="content"><article class="authorpage"><header><h1>Posts by
David Rorke</h1>
<div class="metadata">
</div>
</header><ul class="postlist">
<li>
-<time class="listdate" datetime="2024-10-07T16:00:00-06:00" title="2024-10-07
16:00">2024-10-07 16:00</time><a href="../../posts/impalas-living-on-iceberg/"
class="listtitle">Impala's Living on Iceberg</a>
+<time class="listdate" datetime="2024-10-29T14:00:00-07:00" title="2024-10-29
14:00">2024-10-29 14:00</time><a
href="../../posts/codegen-cache-for-low-latency-queries/"
class="listtitle">Codegen cache for low latency queries</a>
</li>
</ul></article></main><footer id="footer"><p> </p>
<div class="navbar">
diff --git a/blog/authors/gabor-kaszab/index.html
b/blog/authors/gabor-kaszab/index.html
index cee9d5112..e2d7780f0 100644
--- a/blog/authors/gabor-kaszab/index.html
+++ b/blog/authors/gabor-kaszab/index.html
@@ -59,7 +59,7 @@
</div>
</header><ul class="postlist">
<li>
-<time class="listdate" datetime="2024-10-07T16:00:00-06:00" title="2024-10-07
16:00">2024-10-07 16:00</time><a href="../../posts/impalas-living-on-iceberg/"
class="listtitle">Impala's Living on Iceberg</a>
+<time class="listdate" datetime="2024-10-07T16:00:00-06:00" title="2024-10-07
16:00">2024-10-07 16:00</time><a href="../../posts/impalas-living-on-iceberg/"
class="listtitle">Impalas living on Iceberg</a>
</li>
</ul></article></main><footer id="footer"><p> </p>
<div class="navbar">
diff --git a/blog/authors/index.html b/blog/authors/index.html
index 3b5a6bd52..ee1f9b179 100644
--- a/blog/authors/index.html
+++ b/blog/authors/index.html
@@ -59,10 +59,14 @@
</div>
<ul class="postlist">
-<li><a class="reference listtitle" href="gabor-kaszab/">Gabor Kaszab</a></li>
+<li><a class="reference listtitle" href="abhishek-rawat/">Abhishek
Rawat</a></li>
+ <li><a class="reference listtitle" href="david-rorke/">David
Rorke</a></li>
+ <li><a class="reference listtitle" href="gabor-kaszab/">Gabor
Kaszab</a></li>
<li><a class="reference listtitle" href="impala-dev/">Impala
Dev</a></li>
+ <li><a class="reference listtitle"
href="michael-smith/">Michael Smith</a></li>
<li><a class="reference listtitle"
href="noemi-pap-takacs/">Noémi Pap-Takács</a></li>
<li><a class="reference listtitle" href="riza-suminto/">Riza
Suminto</a></li>
+ <li><a class="reference listtitle" href="yida-wu/">Yida
Wu</a></li>
</ul></article></main><footer id="footer"><p> </p>
<div class="navbar">
<div class="navbar-inner">
diff --git a/blog/authors/gabor-kaszab/index.html
b/blog/authors/michael-smith/index.html
similarity index 92%
copy from blog/authors/gabor-kaszab/index.html
copy to blog/authors/michael-smith/index.html
index cee9d5112..0f380f7a3 100644
--- a/blog/authors/gabor-kaszab/index.html
+++ b/blog/authors/michael-smith/index.html
@@ -5,12 +5,12 @@
<meta name="keywords" content="hadoop, impala, sql, mpp, bi, big data, open
source">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<meta charset="utf-8">
-<title>Posts by Gabor Kaszab | Apache Impala</title>
+<title>Posts by Michael Smith | Apache Impala</title>
<link href="../../assets/css/bootstrap.min.css" rel="stylesheet"
type="text/css">
<link href="../../assets/css/bootstrap-responsive.min.css" rel="stylesheet"
type="text/css">
<!-- order is significant to prevent overwriting of some bootstrap-defined css
styles --><link href="../../assets/css/additional_styles.css" rel="stylesheet"
type="text/css">
<script
src="https://ajax.googleapis.com/ajax/libs/jquery/1.11.3/jquery.min.js"></script><script
src="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.5/js/bootstrap.min.js"></script><meta
content="#5670d4" name="theme-color">
-<link rel="canonical"
href="https://impala.apache.org/blog/authors/gabor-kaszab/">
+<link rel="canonical"
href="https://impala.apache.org/blog/authors/michael-smith/">
</head>
<body id="index" class="home">
<div class="container">
@@ -53,13 +53,13 @@
</nav></header>
</div>
<!-- masthead -->
- <main id="content"><article class="authorpage"><header><h1>Posts by
Gabor Kaszab</h1>
+ <main id="content"><article class="authorpage"><header><h1>Posts by
Michael Smith</h1>
<div class="metadata">
</div>
</header><ul class="postlist">
<li>
-<time class="listdate" datetime="2024-10-07T16:00:00-06:00" title="2024-10-07
16:00">2024-10-07 16:00</time><a href="../../posts/impalas-living-on-iceberg/"
class="listtitle">Impala's Living on Iceberg</a>
+<time class="listdate" datetime="2024-10-29T14:00:00-07:00" title="2024-10-29
14:00">2024-10-29 14:00</time><a
href="../../posts/codegen-cache-for-low-latency-queries/"
class="listtitle">Codegen cache for low latency queries</a>
</li>
</ul></article></main><footer id="footer"><p> </p>
<div class="navbar">
diff --git a/blog/authors/gabor-kaszab/index.html
b/blog/authors/yida-wu/index.html
similarity index 92%
copy from blog/authors/gabor-kaszab/index.html
copy to blog/authors/yida-wu/index.html
index cee9d5112..af3433bf1 100644
--- a/blog/authors/gabor-kaszab/index.html
+++ b/blog/authors/yida-wu/index.html
@@ -5,12 +5,12 @@
<meta name="keywords" content="hadoop, impala, sql, mpp, bi, big data, open
source">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<meta charset="utf-8">
-<title>Posts by Gabor Kaszab | Apache Impala</title>
+<title>Posts by Yida Wu | Apache Impala</title>
<link href="../../assets/css/bootstrap.min.css" rel="stylesheet"
type="text/css">
<link href="../../assets/css/bootstrap-responsive.min.css" rel="stylesheet"
type="text/css">
<!-- order is significant to prevent overwriting of some bootstrap-defined css
styles --><link href="../../assets/css/additional_styles.css" rel="stylesheet"
type="text/css">
<script
src="https://ajax.googleapis.com/ajax/libs/jquery/1.11.3/jquery.min.js"></script><script
src="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.5/js/bootstrap.min.js"></script><meta
content="#5670d4" name="theme-color">
-<link rel="canonical"
href="https://impala.apache.org/blog/authors/gabor-kaszab/">
+<link rel="canonical" href="https://impala.apache.org/blog/authors/yida-wu/">
</head>
<body id="index" class="home">
<div class="container">
@@ -53,13 +53,13 @@
</nav></header>
</div>
<!-- masthead -->
- <main id="content"><article class="authorpage"><header><h1>Posts by
Gabor Kaszab</h1>
+ <main id="content"><article class="authorpage"><header><h1>Posts by
Yida Wu</h1>
<div class="metadata">
</div>
</header><ul class="postlist">
<li>
-<time class="listdate" datetime="2024-10-07T16:00:00-06:00" title="2024-10-07
16:00">2024-10-07 16:00</time><a href="../../posts/impalas-living-on-iceberg/"
class="listtitle">Impala's Living on Iceberg</a>
+<time class="listdate" datetime="2024-10-29T14:00:00-07:00" title="2024-10-29
14:00">2024-10-29 14:00</time><a
href="../../posts/codegen-cache-for-low-latency-queries/"
class="listtitle">Codegen cache for low latency queries</a>
</li>
</ul></article></main><footer id="footer"><p> </p>
<div class="navbar">
diff --git a/blog/categories/cat_blogs/index.html
b/blog/categories/cat_blogs/index.html
index 59f8506d0..8ba9fb860 100644
--- a/blog/categories/cat_blogs/index.html
+++ b/blog/categories/cat_blogs/index.html
@@ -60,6 +60,9 @@
</header><ul class="postlist">
<li>
+<time class="listdate" datetime="2024-10-29T14:00:00-07:00" title="2024-10-29
14:00">2024-10-29 14:00</time><a
href="../../posts/codegen-cache-for-low-latency-queries/"
class="listtitle">Codegen cache for low latency queries</a><a></a>
+</li>
+ <li>
<time class="listdate" datetime="2017-01-03T15:45:20-08:00" title="2017-01-03
15:45">2017-01-03 15:45</time><a href="../../posts/impala-blog-coming-soon/"
class="listtitle">Impala Blog Coming Soon</a><a></a>
</li>
</ul></article></main><footer id="footer"><p> </p>
diff --git a/blog/categories/cat_talks/index.html
b/blog/categories/cat_talks/index.html
index 0c9869ba5..fb1ecdac1 100644
--- a/blog/categories/cat_talks/index.html
+++ b/blog/categories/cat_talks/index.html
@@ -66,7 +66,7 @@
<time class="listdate" datetime="2024-10-08T14:50:00-06:00" title="2024-10-08
14:50">2024-10-08 14:50</time><a
href="../../posts/intelligent-utilization-aware-autoscaling-for-impala-virtual-compute-clusters/"
class="listtitle">Intelligent Utilization Aware Autoscaling for Impala Virtual
Compute Clusters</a><a></a>
</li>
<li>
-<time class="listdate" datetime="2024-10-07T16:00:00-06:00" title="2024-10-07
16:00">2024-10-07 16:00</time><a href="../../posts/impalas-living-on-iceberg/"
class="listtitle">Impala's Living on Iceberg</a><a></a>
+<time class="listdate" datetime="2024-10-07T16:00:00-06:00" title="2024-10-07
16:00">2024-10-07 16:00</time><a href="../../posts/impalas-living-on-iceberg/"
class="listtitle">Impalas living on Iceberg</a><a></a>
</li>
<li>
<time class="listdate" datetime="2024-07-27T07:09:35-07:00" title="2024-07-27
07:09">2024-07-27 07:09</time><a
href="../../posts/this-impala-not-only-reads-but-modifies-and-optimizes-iceberg-tables/"
class="listtitle">This Impala not only reads, but modifies and optimizes
Iceberg tables</a><a></a>
diff --git a/blog/categories/ccna24/index.html
b/blog/categories/ccna24/index.html
index 6b10a3d93..20570a07f 100644
--- a/blog/categories/ccna24/index.html
+++ b/blog/categories/ccna24/index.html
@@ -66,7 +66,7 @@
<time class="listdate" datetime="2024-10-08T14:50:00-06:00" title="2024-10-08
14:50">2024-10-08 14:50</time><a
href="../../posts/intelligent-utilization-aware-autoscaling-for-impala-virtual-compute-clusters/"
class="listtitle">Intelligent Utilization Aware Autoscaling for Impala Virtual
Compute Clusters</a><a></a>
</li>
<li>
-<time class="listdate" datetime="2024-10-07T16:00:00-06:00" title="2024-10-07
16:00">2024-10-07 16:00</time><a href="../../posts/impalas-living-on-iceberg/"
class="listtitle">Impala's Living on Iceberg</a><a></a>
+<time class="listdate" datetime="2024-10-07T16:00:00-06:00" title="2024-10-07
16:00">2024-10-07 16:00</time><a href="../../posts/impalas-living-on-iceberg/"
class="listtitle">Impalas living on Iceberg</a><a></a>
</li>
</ul></article></main><footer id="footer"><p> </p>
<div class="navbar">
diff --git a/blog/categories/cat_posts/index.html
b/blog/categories/codegen/index.html
similarity index 92%
rename from blog/categories/cat_posts/index.html
rename to blog/categories/codegen/index.html
index b059947a5..522f9dcc8 100644
--- a/blog/categories/cat_posts/index.html
+++ b/blog/categories/codegen/index.html
@@ -5,12 +5,12 @@
<meta name="keywords" content="hadoop, impala, sql, mpp, bi, big data, open
source">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<meta charset="utf-8">
-<title>Posts about posts | Apache Impala</title>
+<title>Posts about codegen | Apache Impala</title>
<link href="../../assets/css/bootstrap.min.css" rel="stylesheet"
type="text/css">
<link href="../../assets/css/bootstrap-responsive.min.css" rel="stylesheet"
type="text/css">
<!-- order is significant to prevent overwriting of some bootstrap-defined css
styles --><link href="../../assets/css/additional_styles.css" rel="stylesheet"
type="text/css">
<script
src="https://ajax.googleapis.com/ajax/libs/jquery/1.11.3/jquery.min.js"></script><script
src="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.5/js/bootstrap.min.js"></script><meta
content="#5670d4" name="theme-color">
-<link rel="canonical"
href="https://impala.apache.org/blog/categories/cat_posts/">
+<link rel="canonical"
href="https://impala.apache.org/blog/categories/codegen/">
</head>
<body id="index" class="home">
<div class="container">
@@ -53,14 +53,14 @@
</nav></header>
</div>
<!-- masthead -->
- <main id="content"><article class="tagpage"><header><h1>Posts about
posts</h1>
+ <main id="content"><article class="tagpage"><header><h1>Posts about
codegen</h1>
<div class="metadata">
</div>
</header><ul class="postlist">
<li>
-<time class="listdate" datetime="2024-10-07T16:00:00-06:00" title="2024-10-07
16:00">2024-10-07 16:00</time><a href="../../posts/impalas-living-on-iceberg/"
class="listtitle">Impalas living on Iceberg</a><a></a>
+<time class="listdate" datetime="2024-10-29T14:00:00-07:00" title="2024-10-29
14:00">2024-10-29 14:00</time><a
href="../../posts/codegen-cache-for-low-latency-queries/"
class="listtitle">Codegen cache for low latency queries</a><a></a>
</li>
</ul></article></main><footer id="footer"><p> </p>
<div class="navbar">
diff --git a/blog/categories/index.html b/blog/categories/index.html
index a424d2bf6..f34103565 100644
--- a/blog/categories/index.html
+++ b/blog/categories/index.html
@@ -71,6 +71,7 @@
<ul class="postlist">
<li><a class="reference listtitle" href="cceu24/">cceu24</a></li>
<li><a class="reference listtitle"
href="ccna24/">ccna24</a></li>
+ <li><a class="reference listtitle"
href="codegen/">codegen</a></li>
</ul></article></main><footer id="footer"><p> </p>
<div class="navbar">
<div class="navbar-inner">
diff --git a/blog/images/codegen-cache-perf.png
b/blog/images/codegen-cache-perf.png
new file mode 100644
index 000000000..c065e3a7e
Binary files /dev/null and b/blog/images/codegen-cache-perf.png differ
diff --git a/blog/images/codegen-cache-perf.thumbnail.png
b/blog/images/codegen-cache-perf.thumbnail.png
new file mode 100644
index 000000000..350902774
Binary files /dev/null and b/blog/images/codegen-cache-perf.thumbnail.png differ
diff --git a/blog/images/query-exec.png b/blog/images/query-exec.png
new file mode 100644
index 000000000..84f8ef344
Binary files /dev/null and b/blog/images/query-exec.png differ
diff --git a/blog/images/query-exec.thumbnail.png
b/blog/images/query-exec.thumbnail.png
new file mode 100644
index 000000000..807fd23e2
Binary files /dev/null and b/blog/images/query-exec.thumbnail.png differ
diff --git a/blog/archive.html b/blog/index-1.html
similarity index 65%
copy from blog/archive.html
copy to blog/index-1.html
index 64e1c7cf4..543aae673 100644
--- a/blog/archive.html
+++ b/blog/index-1.html
@@ -6,12 +6,13 @@
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<meta charset="utf-8">
<meta name="description" content="Apache Impala is a modern, open source,
distributed SQL query engine for open data and table formats.">
-<title>Archive | Apache Impala</title>
+<title>Apache Impala (old posts, page 1) | Apache Impala</title>
<link href="assets/css/bootstrap.min.css" rel="stylesheet" type="text/css">
<link href="assets/css/bootstrap-responsive.min.css" rel="stylesheet"
type="text/css">
<!-- order is significant to prevent overwriting of some bootstrap-defined css
styles --><link href="assets/css/additional_styles.css" rel="stylesheet"
type="text/css">
<script
src="https://ajax.googleapis.com/ajax/libs/jquery/1.11.3/jquery.min.js"></script><script
src="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.5/js/bootstrap.min.js"></script><meta
content="#5670d4" name="theme-color">
-<link rel="canonical" href="https://impala.apache.org/blog/archive.html">
+<link rel="canonical" href="https://impala.apache.org/blog/index-1.html">
+<link rel="prev" href="." type="text/html">
</head>
<body id="index" class="home">
<div class="container">
@@ -54,25 +55,44 @@
</nav></header>
</div>
<!-- masthead -->
- <main id="content"><article class="listpage"><header><h1>Archive</h1>
- </header><ul class="postlist">
-<li>
-<a href="2024/">2024</a>
- (6)
- </li>
-<li>
-<a href="2017/">2017</a>
- (1)
- </li>
-<li>
-<a href="2016/">2016</a>
- (1)
- </li>
-<li>
-<a href="2015/">2015</a>
- (2)
- </li>
-</ul></article></main><footer id="footer"><p> </p>
+ <main id="content"><h3>All articles</h3>
+<div class="postindex">
+ <hr>
+<article class="h-entry post-text"><header><h3 class="p-name entry-title"><a
href="posts/impala-a-modern-open-source-sql-engine-for-hadoop/"
class="u-url">Impala: A Modern, Open-Source SQL Engine for Hadoop</a></h3>
+ <div class="metadata">
+ <p class="byline author vcard"><span class="byline-name fn">
+ <a href="authors/impala-dev/">Impala Dev</a>
+ </span></p>
+ <p class="dateline"><a
href="posts/impala-a-modern-open-source-sql-engine-for-hadoop/"
rel="bookmark"><time class="published dt-published"
datetime="2015-01-05T23:00:00-07:00" title="2015-01-05 23:00">2015-01-05
23:00</time></a></p>
+ </div>
+ </header><div class="p-summary entry-summary">
+ <p>
+ <i>Presented at
+ <a href="https://www.cidrdb.org/cidr2015/program.html" target="_blank">The
Conference on
+ Innovative Data Systems Research (CIDR) 2015</a>.
+ </i>
+</p>
+
+<h4>ABSTRACT</h4>
+<p>Cloudera Impala is a modern, open-source MPP SQL engine architected from
the ground up for the
+Hadoop data processing environment. Impala provides low latency and high
concurrency for BI/analytic
+read-mostly queries on Hadoop, not delivered by batch frameworks such as
Apache Hive. This paper
+presents Impala from a user’s perspective, gives an overview of its
architecture and main components
+and briefly demonstrates its superior performance compared against other
popular SQL-on-Hadoop
+systems.</p>
+
+<p>
+ <a href="https://www.cidrdb.org/cidr2015/Papers/CIDR15_Paper28.pdf"
target="_blank">Paper</a> |
+ <a
href="https://www.cidrdb.org/cidr2015/Slides/28_CIDR15_Slides_Paper28.pdf"
target="_blank">Slides</a>
+</p>
+ </div>
+ </article>
+</div>
+ <nav class="postindexpager"><ul class="pager">
+<li class="previous">
+ <a href="." rel="prev">Newer posts</a>
+ </li>
+ </ul></nav></main><footer id="footer"><p> </p>
<div class="navbar">
<div class="navbar-inner">
<div class="container">
diff --git a/blog/index.html b/blog/index.html
index 81360d5d2..76fc6a82f 100644
--- a/blog/index.html
+++ b/blog/index.html
@@ -12,7 +12,8 @@
<!-- order is significant to prevent overwriting of some bootstrap-defined css
styles --><link href="assets/css/additional_styles.css" rel="stylesheet"
type="text/css">
<script
src="https://ajax.googleapis.com/ajax/libs/jquery/1.11.3/jquery.min.js"></script><script
src="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.5/js/bootstrap.min.js"></script><meta
content="#5670d4" name="theme-color">
<link rel="canonical" href="https://impala.apache.org/blog/">
-<link rel="prefetch" href="posts/healing-iceberg-tables-with-impala/"
type="text/html">
+<link rel="next" href="index-1.html" type="text/html">
+<link rel="prefetch" href="posts/codegen-cache-for-low-latency-queries/"
type="text/html">
</head>
<body id="index" class="home">
<div class="container">
@@ -58,6 +59,125 @@
<main id="content"><h3>All articles</h3>
<div class="postindex">
<hr>
+<article class="h-entry post-text"><header><h3 class="p-name entry-title"><a
href="posts/codegen-cache-for-low-latency-queries/" class="u-url">Codegen cache
for low latency queries</a></h3>
+ <div class="metadata">
+ <p class="byline author vcard"><span class="byline-name fn">
+ <a href="authors/michael-smith/">Michael Smith</a>
+ <a href="authors/yida-wu/">Yida Wu</a>
+ <a href="authors/david-rorke/">David Rorke</a>
+ <a href="authors/abhishek-rawat/">Abhishek Rawat</a>
+ </span></p>
+ <p class="dateline"><a
href="posts/codegen-cache-for-low-latency-queries/" rel="bookmark"><time
class="published dt-published" datetime="2024-10-29T14:00:00-07:00"
title="2024-10-29 14:00">2024-10-29 14:00</time></a></p>
+ </div>
+ </header><div class="p-summary entry-summary">
+ <p>Apache Impala is a high-performance engine - written primarily in C++ -
for executing low-latency SQL queries. At a high level, Impala generates a
distributed query plan (first two phases in yellow), admits the query once
sufficient capacity is available, and finally executes the query. For a more
in-depth description of these phases please refer to <a
href="https://www.cidrdb.org/cidr2015/Papers/CIDR15_Paper28.pdf">Impala: A
Modern, Open-Source SQL Engine for Hadoop</a>.</p>
+<p><img alt="Query Execution" src="images/query-exec.png" style="display:
block; margin: 0 auto;"></p>
+<p>During Distributed Execution, each fragment of the query plan is run on one
or more Impala executors, with a degree of parallelism determined by the
planner. A fragment is a distinct block of work that can be executed on a
single node, and often comprises steps such as scanning and filtering rows from
files (or other data sources), hashing that data to group or order it, and
sending it to other executors via an exchange for distributed aggregation.</p>
+<h4>Code Generation</h4>
+<p>The steps taken within each fragment comprise the bulk of the work an
executor does, and databases use different techniques to optimize that work.
The actual operations needed will depend on the types of the specific columns
being manipulated, which may be simple scalar types or complex data such as
structs and arrays. At the beginning of executing each fragment, Impala
leverages the <a href="https://llvm.org">LLVM project</a> to generate machine
code specific to the steps and columns [...]
+<p>Code generation can dramatically speed up the operations done on each row,
but has an initial overhead in generating the code that offsets that benefit.
This initial overhead of generating code becomes relevant to sub second and low
second queries because codegen time of say 100-250 ms is relevant if the query
only takes 2 seconds to finish. Typical examples of such queries are queries on
kudu tables that finish in seconds. Historically we recommended users to either
<code>set DISABLE [...]
+<p><code>DISABLE_CODEGEN_ROWS_THRESHOLD</code> currently estimates the number
rows being processed on each of the nodes and then decides whether codegen
should be disabled. There are scenarios where the planner estimate is incorrect
or the query is complex and codegen would have actually helped.</p>
+<p>To help mitigate the cost of codegen for short running queries that are run
repeatedly we've introduced a new codegen caching feature. With codegen cache
enabled, code generation for queries will be cached, and subsequent runs will
be faster by not needing to regenerate that code.</p>
+<p>Using Cloudera Data Warehouse 1.9.2 with Runtime 2024.0.18.0-206 on AWS EC2
r5d.4xlarge instances, we performed a TPC-DS 1 TB benchmark with 10 executors
to evaluate codegen cache performance. Across the whole test suite we saw
geometric mean times improve by 4.8%. Since we expect codegen cache to help
more with faster queries, we isolate the queries that executed in less than
2s:</p>
+<p><img alt="Codegen cache performance" src="images/codegen-cache-perf.png"
style="display: block; margin: 0 auto;"></p>
+<p>For these queries, we see a geometric mean improvement of 22%,
significantly improving the performance of low latency queries by eliminating
most of the code generation time.</p>
+<h4>The Codegen Cache</h4>
+<p><a
href="https://docs.cloudera.com/cdw-runtime/cloud/impala-reference/topics/impala-codegencaching.html">Caching
Codegen Functions</a> has been added to reduce the cost of code generation
when repeating queries or running substantially similar queries by caching the
results of code generation. The codegen cache in Impala works at the fragment
level, meaning that it caches and reuses the machine code for specific
fragments of a query.</p>
+<p>When Impala generates code using LLVM and the codegen cache is enabled, it
will store the generated objects using <a
href="https://blog.llvm.org/2013/08/object-caching-with-kaleidoscope.html">LLVM’s
Object Caching</a>. Impala goes through several steps during codegen:</p>
+<ol>
+<li>Load pre-parsed and partially optimized Impala library functions so that
new code generation can reference them.</li>
+<li>Define functions representing the operations to be performed using LLVM’s
intermediate representation (IR).</li>
+<li>Prune unused library functions loaded in step (1).</li>
+<li>Run LLVM’s builtin passes to optimize the IR generated through steps
1-3.</li>
+<li>Generate machine code from the optimized IR.</li>
+</ol>
+<p>The most time consuming portion of these are optimization passes and
generating machine code. When using the codegen cache, Impala performs steps
1-3, then constructs a key based on a serialization of the IR. It then looks
for a match for the key in the codegen cache; if found, the result will be a
machine code object that’s ready for immediate use; otherwise steps 4 and 5 are
performed to generate machine code, which will then be stored to the codegen
cache and used.</p>
+<p>The codegen cache stores all objects in-memory. Its capacity is determined
by <code>CODEGEN_CACHE_CAPACITY</code>. When the cache is full, it evicts the
Least-Recently Used (LRU) object to make space for new entries.</p>
+<h5>Example of Caching Codegen Functions</h5>
+<p>Consider the following table:</p>
+<div class="code"><pre class="code literal-block">create table sales_data
(product_id int, category string, sales double);
+</pre></div>
+
+<p>We run two similar queries sequentially:</p>
+<ol>
+<li><code>select category, sum(sales) from sales_data where category = 'a'
group by category;</code></li>
+<li><code>select category, sum(sales) from sales_data where category = 'b'
group by category;</code></li>
+</ol>
+<p>After running Query 1, the query profile shows the plan as follows, with
zero cached functions and a total codegen compilation time of several dozen
milliseconds for each fragment.</p>
+<div class="code"><pre class="code literal-block"><span
class="nl">F02</span><span class="p">:</span><span class="k">PLAN</span><span
class="w"> </span><span class="n">FRAGMENT</span><span class="w"> </span><span
class="o">[</span><span class="n">UNPARTITIONED</span><span
class="o">]</span><span class="w"> </span><span class="n">hosts</span><span
class="o">=</span><span class="mi">1</span><span class="w"> </span><span
class="n">instances</span><span class="o">=</span><span class="mi">1</span>
+<span class="p">...</span>
+<span class="mi">04</span><span class="err">:</span><span
class="n">EXCHANGE</span><span class="w"> </span><span class="o">[</span><span
class="n">UNPARTITIONED</span><span class="o">]</span>
+<span class="p">...</span>
+<span class="nl">F01</span><span class="p">:</span><span
class="k">PLAN</span><span class="w"> </span><span
class="n">FRAGMENT</span><span class="w"> </span><span class="o">[</span><span
class="n">HASH(category)</span><span class="o">]</span><span class="w">
</span><span class="n">hosts</span><span class="o">=</span><span
class="mi">1</span><span class="w"> </span><span
class="n">instances</span><span class="o">=</span><span class="mi">1</span>
+<span class="mi">03</span><span class="err">:</span><span
class="k">AGGREGATE</span><span class="w"> </span><span class="o">[</span><span
class="n">FINALIZE</span><span class="o">]</span>
+<span class="p">...</span>
+<span class="mi">02</span><span class="err">:</span><span
class="n">EXCHANGE</span><span class="w"> </span><span class="o">[</span><span
class="n">HASH(category)</span><span class="o">]</span>
+<span class="p">...</span>
+<span class="nl">F00</span><span class="p">:</span><span
class="k">PLAN</span><span class="w"> </span><span
class="n">FRAGMENT</span><span class="w"> </span><span class="o">[</span><span
class="n">RANDOM</span><span class="o">]</span><span class="w"> </span><span
class="n">hosts</span><span class="o">=</span><span class="mi">1</span><span
class="w"> </span><span class="n">instances</span><span class="o">=</span><span
class="mi">1</span>
+<span class="mi">01</span><span class="err">:</span><span
class="k">AGGREGATE</span><span class="w"> </span><span class="o">[</span><span
class="n">STREAMING</span><span class="o">]</span>
+<span class="p">...</span>
+<span class="mi">00</span><span class="err">:</span><span
class="n">SCAN</span><span class="w"> </span><span class="n">HDFS</span><span
class="w"> </span><span class="o">[</span><span class="n">default.sales_data,
RANDOM</span><span class="o">]</span>
+<span class="p">...</span>
+<span class="w"> </span><span class="n">Fragment</span><span class="w">
</span><span class="nl">F02</span><span class="p">:</span>
+<span class="w"> </span><span class="nl">CodeGen</span><span
class="p">:</span>
+<span class="w"> </span><span class="p">...</span>
+<span class="w"> </span><span class="o">-</span><span class="w">
</span><span class="nl">NumCachedFunctions</span><span class="p">:</span><span
class="w"> </span><span class="mi">0</span><span class="w"> </span><span
class="p">(</span><span class="mi">0</span><span class="p">)</span>
+<span class="w"> </span><span class="p">...</span>
+<span class="w"> </span><span class="o">-</span><span class="w">
</span><span class="nl">NumOptimizedFunctions</span><span
class="p">:</span><span class="w"> </span><span class="mi">2</span><span
class="w"> </span><span class="p">(</span><span class="mi">2</span><span
class="p">)</span>
+<span class="w"> </span><span class="p">...</span>
+<span class="w"> </span><span class="o">-</span><span class="w">
</span><span class="nl">TotalTime</span><span class="p">:</span><span
class="w"> </span><span class="mf">52.000</span><span class="n">ms</span>
+<span class="w"> </span><span class="n">Fragment</span><span class="w">
</span><span class="nl">F01</span><span class="p">:</span>
+<span class="w"> </span><span class="nl">CodeGen</span><span
class="p">:</span>
+<span class="w"> </span><span class="p">...</span>
+<span class="w"> </span><span class="o">-</span><span class="w">
</span><span class="nl">NumCachedFunctions</span><span class="p">:</span><span
class="w"> </span><span class="mi">0</span><span class="w"> </span><span
class="p">(</span><span class="mi">0</span><span class="p">)</span>
+<span class="w"> </span><span class="p">...</span>
+<span class="w"> </span><span class="o">-</span><span class="w">
</span><span class="nl">NumOptimizedFunctions</span><span
class="p">:</span><span class="w"> </span><span class="mi">20</span><span
class="w"> </span><span class="p">(</span><span class="mi">20</span><span
class="p">)</span>
+<span class="w"> </span><span class="p">...</span>
+<span class="w"> </span><span class="o">-</span><span class="w">
</span><span class="nl">TotalTime</span><span class="p">:</span><span
class="w"> </span><span class="mf">100.000</span><span class="n">ms</span>
+<span class="w"> </span><span class="n">Fragment</span><span class="w">
</span><span class="nl">F00</span><span class="p">:</span>
+<span class="w"> </span><span class="nl">CodeGen</span><span
class="p">:</span>
+<span class="w"> </span><span class="p">...</span>
+<span class="w"> </span><span class="o">-</span><span class="w">
</span><span class="nl">NumCachedFunctions</span><span class="p">:</span><span
class="w"> </span><span class="mi">0</span><span class="w"> </span><span
class="p">(</span><span class="mi">0</span><span class="p">)</span>
+<span class="w"> </span><span class="p">...</span>
+<span class="w"> </span><span class="o">-</span><span class="w">
</span><span class="nl">NumOptimizedFunctions</span><span
class="p">:</span><span class="w"> </span><span class="mi">20</span><span
class="w"> </span><span class="p">(</span><span class="mi">20</span><span
class="p">)</span>
+<span class="w"> </span><span class="p">...</span>
+<span class="w"> </span><span class="o">-</span><span class="w">
</span><span class="nl">TotalTime</span><span class="p">:</span><span
class="w"> </span><span class="mf">116.000</span><span class="n">ms</span>
+</pre></div>
+
+<p>After running Query 2, the functions of fragments F02 and F01 are
successfully loaded from the codegen cache, because these fragments are
identical in both queries, largely reducing the total codegen compilation
time. However, Fragment F00 does not hit the codegen cache because different
predicates are used in the two queries, like in our case, <code>category =
'a'</code> vs. <code>category = 'b'</code>. As a result, the codegen functions
in the corresponding scan nodes are treated a [...]
+<div class="code"><pre class="code literal-block"> Fragment F02:
+ CodeGen:
+ ...
+ - NumCachedFunctions: 2 (2)
+ ...
+ - NumOptimizedFunctions: 2 (2)
+ ...
+ - TotalTime: 32.000ms
+ Fragment F01:
+ CodeGen:
+ ...
+ - NumCachedFunctions: 20 (20)
+ ...
+ - NumOptimizedFunctions: 20 (20)
+ ...
+ - TotalTime: 40.000ms
+ Fragment F00:
+ CodeGen:
+ ...
+ - NumCachedFunctions: 0 (0)
+ ...
+ - NumOptimizedFunctions: 20 (20)
+ ...
+ - TotalTime: 112.000ms
+</pre></div>
+
+<p>Note that native UDF won't be supported by the codegen cache, if a fragment
contains any native UDF, the codegen of that fragment won't be cached.</p>
+<h4>Summary</h4>
+<p>Codegen Cache is supported and enabled by default since Impala 4.3. By
setting the flag file option <code>CODEGEN_CACHE_CAPACITY</code>, you can
adjust its default value of the memory used for codegen cache.</p>
+<p>Interested in contributing? We have future work planned here for codegen
caching - <a
href="https://issues.apache.org/jira/browse/IMPALA-13187">IMPALA-13187</a></p>
+<p><em>Reblogged with edit from <a
href="https://medium.com/engineering-cloudera/codegen-cache-for-low-latency-queries-47d5fd947fcf">Engineering@Cloudera
on Medium</a></em></p>
+ </div>
+ </article><hr>
<article class="h-entry post-text"><header><h3 class="p-name entry-title"><a
href="posts/healing-iceberg-tables-with-impala/" class="u-url">Healing Iceberg
Tables with Impala</a></h3>
<div class="metadata">
<p class="byline author vcard"><span class="byline-name fn">
@@ -100,7 +220,7 @@ admins/users to manage Virtual Clusters.</p>
<p><em>Appeared in <a
href="https://communityovercode.org/schedule/#sz-tab-45573">Community Over Code
NA 2024</a></em></p>
</div>
</article><hr>
-<article class="h-entry post-text"><header><h3 class="p-name entry-title"><a
href="posts/impalas-living-on-iceberg/" class="u-url">Impala's Living on
Iceberg</a></h3>
+<article class="h-entry post-text"><header><h3 class="p-name entry-title"><a
href="posts/impalas-living-on-iceberg/" class="u-url">Impalas living on
Iceberg</a></h3>
<div class="metadata">
<p class="byline author vcard"><span class="byline-name fn">
<a href="authors/gabor-kaszab/">Gabor Kaszab</a>
@@ -285,41 +405,14 @@ handle nested data types.</p>
</div>
<p><i>Presented in Impala Meetup, PA, March 24th, 2015</i></p>
- </div>
- </article><hr>
-<article class="h-entry post-text"><header><h3 class="p-name entry-title"><a
href="posts/impala-a-modern-open-source-sql-engine-for-hadoop/"
class="u-url">Impala: A Modern, Open-Source SQL Engine for Hadoop</a></h3>
- <div class="metadata">
- <p class="byline author vcard"><span class="byline-name fn">
- <a href="authors/impala-dev/">Impala Dev</a>
- </span></p>
- <p class="dateline"><a
href="posts/impala-a-modern-open-source-sql-engine-for-hadoop/"
rel="bookmark"><time class="published dt-published"
datetime="2015-01-05T23:00:00-07:00" title="2015-01-05 23:00">2015-01-05
23:00</time></a></p>
- </div>
- </header><div class="p-summary entry-summary">
- <p>
- <i>Presented at
- <a href="https://www.cidrdb.org/cidr2015/program.html" target="_blank">The
Conference on
- Innovative Data Systems Research (CIDR) 2015</a>.
- </i>
-</p>
-
-<h4>ABSTRACT</h4>
-<p>Cloudera Impala is a modern, open-source MPP SQL engine architected from
the ground up for the
-Hadoop data processing environment. Impala provides low latency and high
concurrency for BI/analytic
-read-mostly queries on Hadoop, not delivered by batch frameworks such as
Apache Hive. This paper
-presents Impala from a user’s perspective, gives an overview of its
architecture and main components
-and briefly demonstrates its superior performance compared against other
popular SQL-on-Hadoop
-systems.</p>
-
-<p>
- <a href="https://www.cidrdb.org/cidr2015/Papers/CIDR15_Paper28.pdf"
target="_blank">Paper</a> |
- <a
href="https://www.cidrdb.org/cidr2015/Slides/28_CIDR15_Slides_Paper28.pdf"
target="_blank">Slides</a>
-</p>
</div>
</article>
</div>
-
-
- </main><footer id="footer"><p> </p>
+ <nav class="postindexpager"><ul class="pager">
+<li class="next">
+ <a href="index-1.html" rel="next">Older posts</a>
+ </li>
+ </ul></nav></main><footer id="footer"><p> </p>
<div class="navbar">
<div class="navbar-inner">
<div class="container">
diff --git a/blog/posts/codegen-cache-for-low-latency-queries/index.html
b/blog/posts/codegen-cache-for-low-latency-queries/index.html
new file mode 100644
index 000000000..5c023ddcc
--- /dev/null
+++ b/blog/posts/codegen-cache-for-low-latency-queries/index.html
@@ -0,0 +1,225 @@
+<!DOCTYPE html>
+<html prefix="
+ " lang="en">
+<head>
+<meta name="keywords" content="hadoop, impala, sql, mpp, bi, big data, open
source">
+<meta name="viewport" content="width=device-width, initial-scale=1.0">
+<meta charset="utf-8">
+<meta name="description" content="Making fast queries faster in Impala">
+<title>Codegen cache for low latency queries | Apache Impala</title>
+<link href="../../assets/css/bootstrap.min.css" rel="stylesheet"
type="text/css">
+<link href="../../assets/css/bootstrap-responsive.min.css" rel="stylesheet"
type="text/css">
+<!-- order is significant to prevent overwriting of some bootstrap-defined css
styles --><link href="../../assets/css/additional_styles.css" rel="stylesheet"
type="text/css">
+<script
src="https://ajax.googleapis.com/ajax/libs/jquery/1.11.3/jquery.min.js"></script><script
src="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.5/js/bootstrap.min.js"></script><meta
content="#5670d4" name="theme-color">
+<link rel="canonical"
href="https://impala.apache.org/blog/posts/codegen-cache-for-low-latency-queries/">
+<meta name="description" content="Making fast queries faster in Impala">
+<meta name="author" content="Michael Smith, Yida Wu, David Rorke, Abhishek
Rawat">
+<link rel="prev" href="../healing-iceberg-tables-with-impala/" title="Healing
Iceberg Tables with Impala" type="text/html">
+</head>
+<body id="index" class="home">
+ <div class="container">
+ <div class="masthead">
+ <header id="header"><h3 id="brand">
+
+ <span id="blog-title" class="muted">Apache Impala</span>
+ </h3>
+
+ <nav id="menu"><div class="navbar">
+ <div class="navbar-inner">
+ <div class="container">
+
+ <ul class="nav">
+<li><a href="https://impala.apache.org/index.html">Home</a></li>
+ <li><a
href="https://impala.apache.org/downloads.html">Downloads</a></li>
+ <li><a
href="https://impala.apache.org/overview.html">Overview</a></li>
+ <li class="active"><a
href="https://impala.apache.org/blog/">Blog</a></li>
+ <li><a
href="https://cwiki.apache.org/confluence/display/IMPALA/Contributing+to+Impala">Contribute</a></li>
+ <li class="dropdown">
+ <a href="#" class="dropdown-toggle" data-toggle="dropdown"
role="button">
+ Source code</a>
+ <ul class="dropdown-menu">
+<li>
+ <a
href="https://git-wip-us.apache.org/repos/asf/incubator-impala.git">
+ Official source</a>
+ </li>
+ <li><a
href="https://github.com/apache/incubator-impala">GitHub mirror</a></li>
+ </ul>
+</li>
+ <li><a
href="https://impala.apache.org/community.html">Community</a></li>
+ <li><a
href="https://impala.apache.org/impala-docs.html">Documentation</a></li>
+ </ul>
+</div>
+<!-- container -->
+ </div>
+<!-- navbar-inner -->
+ </div>
+<!-- navbar -->
+ </nav></header>
+</div>
+<!-- masthead -->
+ <main id="content"><article class="post-text h-entry hentry postpage"
itemscope="itemscope" itemtype="http://schema.org/Article"><header><h3
class="p-name entry-title" itemprop="headline name"><a href="."
class="u-url">Codegen cache for low latency queries</a></h3>
+
+ <div class="metadata">
+ <p class="byline author vcard">
+ <span class="byline-name fn">
+ <a href="../../authors/michael-smith/">Michael
Smith</a>
+ <a href="../../authors/yida-wu/">Yida Wu</a>
+ <a href="../../authors/david-rorke/">David Rorke</a>
+ <a href="../../authors/abhishek-rawat/">Abhishek
Rawat</a>
+ </span>
+ </p>
+ <p class="dateline"><a href="." rel="bookmark"><time
class="published dt-published" datetime="2024-10-29T14:00:00-07:00"
itemprop="datePublished" title="2024-10-29 14:00">2024-10-29
14:00</time></a></p>
+ <meta name="description" itemprop="description"
content="Making fast queries faster in Impala">
+</div>
+ </header><div class="e-content entry-content" itemprop="articleBody text">
+ <p>Apache Impala is a high-performance engine - written primarily in C++ -
for executing low-latency SQL queries. At a high level, Impala generates a
distributed query plan (first two phases in yellow), admits the query once
sufficient capacity is available, and finally executes the query. For a more
in-depth description of these phases please refer to <a
href="https://www.cidrdb.org/cidr2015/Papers/CIDR15_Paper28.pdf">Impala: A
Modern, Open-Source SQL Engine for Hadoop</a>.</p>
+<p><img alt="Query Execution" src="../../images/query-exec.png"
style="display: block; margin: 0 auto;"></p>
+<p>During Distributed Execution, each fragment of the query plan is run on one
or more Impala executors, with a degree of parallelism determined by the
planner. A fragment is a distinct block of work that can be executed on a
single node, and often comprises steps such as scanning and filtering rows from
files (or other data sources), hashing that data to group or order it, and
sending it to other executors via an exchange for distributed aggregation.</p>
+<h4>Code Generation</h4>
+<p>The steps taken within each fragment comprise the bulk of the work an
executor does, and databases use different techniques to optimize that work.
The actual operations needed will depend on the types of the specific columns
being manipulated, which may be simple scalar types or complex data such as
structs and arrays. At the beginning of executing each fragment, Impala
leverages the <a href="https://llvm.org">LLVM project</a> to generate machine
code specific to the steps and columns [...]
+<p>Code generation can dramatically speed up the operations done on each row,
but has an initial overhead in generating the code that offsets that benefit.
This initial overhead of generating code becomes relevant to sub second and low
second queries because codegen time of say 100-250 ms is relevant if the query
only takes 2 seconds to finish. Typical examples of such queries are queries on
kudu tables that finish in seconds. Historically we recommended users to either
<code>set DISABLE [...]
+<p><code>DISABLE_CODEGEN_ROWS_THRESHOLD</code> currently estimates the number
rows being processed on each of the nodes and then decides whether codegen
should be disabled. There are scenarios where the planner estimate is incorrect
or the query is complex and codegen would have actually helped.</p>
+<p>To help mitigate the cost of codegen for short running queries that are run
repeatedly we've introduced a new codegen caching feature. With codegen cache
enabled, code generation for queries will be cached, and subsequent runs will
be faster by not needing to regenerate that code.</p>
+<p>Using Cloudera Data Warehouse 1.9.2 with Runtime 2024.0.18.0-206 on AWS EC2
r5d.4xlarge instances, we performed a TPC-DS 1 TB benchmark with 10 executors
to evaluate codegen cache performance. Across the whole test suite we saw
geometric mean times improve by 4.8%. Since we expect codegen cache to help
more with faster queries, we isolate the queries that executed in less than
2s:</p>
+<p><img alt="Codegen cache performance"
src="../../images/codegen-cache-perf.png" style="display: block; margin: 0
auto;"></p>
+<p>For these queries, we see a geometric mean improvement of 22%,
significantly improving the performance of low latency queries by eliminating
most of the code generation time.</p>
+<h4>The Codegen Cache</h4>
+<p><a
href="https://docs.cloudera.com/cdw-runtime/cloud/impala-reference/topics/impala-codegencaching.html">Caching
Codegen Functions</a> has been added to reduce the cost of code generation
when repeating queries or running substantially similar queries by caching the
results of code generation. The codegen cache in Impala works at the fragment
level, meaning that it caches and reuses the machine code for specific
fragments of a query.</p>
+<p>When Impala generates code using LLVM and the codegen cache is enabled, it
will store the generated objects using <a
href="https://blog.llvm.org/2013/08/object-caching-with-kaleidoscope.html">LLVM’s
Object Caching</a>. Impala goes through several steps during codegen:</p>
+<ol>
+<li>Load pre-parsed and partially optimized Impala library functions so that
new code generation can reference them.</li>
+<li>Define functions representing the operations to be performed using LLVM’s
intermediate representation (IR).</li>
+<li>Prune unused library functions loaded in step (1).</li>
+<li>Run LLVM’s builtin passes to optimize the IR generated through steps
1-3.</li>
+<li>Generate machine code from the optimized IR.</li>
+</ol>
+<p>The most time consuming portion of these are optimization passes and
generating machine code. When using the codegen cache, Impala performs steps
1-3, then constructs a key based on a serialization of the IR. It then looks
for a match for the key in the codegen cache; if found, the result will be a
machine code object that’s ready for immediate use; otherwise steps 4 and 5 are
performed to generate machine code, which will then be stored to the codegen
cache and used.</p>
+<p>The codegen cache stores all objects in-memory. Its capacity is determined
by <code>CODEGEN_CACHE_CAPACITY</code>. When the cache is full, it evicts the
Least-Recently Used (LRU) object to make space for new entries.</p>
+<h5>Example of Caching Codegen Functions</h5>
+<p>Consider the following table:</p>
+<div class="code"><pre class="code literal-block">create table sales_data
(product_id int, category string, sales double);
+</pre></div>
+
+<p>We run two similar queries sequentially:</p>
+<ol>
+<li><code>select category, sum(sales) from sales_data where category = 'a'
group by category;</code></li>
+<li><code>select category, sum(sales) from sales_data where category = 'b'
group by category;</code></li>
+</ol>
+<p>After running Query 1, the query profile shows the plan as follows, with
zero cached functions and a total codegen compilation time of several dozen
milliseconds for each fragment.</p>
+<div class="code"><pre class="code literal-block"><span
class="nl">F02</span><span class="p">:</span><span class="k">PLAN</span><span
class="w"> </span><span class="n">FRAGMENT</span><span class="w"> </span><span
class="o">[</span><span class="n">UNPARTITIONED</span><span
class="o">]</span><span class="w"> </span><span class="n">hosts</span><span
class="o">=</span><span class="mi">1</span><span class="w"> </span><span
class="n">instances</span><span class="o">=</span><span class="mi">1</span>
+<span class="p">...</span>
+<span class="mi">04</span><span class="err">:</span><span
class="n">EXCHANGE</span><span class="w"> </span><span class="o">[</span><span
class="n">UNPARTITIONED</span><span class="o">]</span>
+<span class="p">...</span>
+<span class="nl">F01</span><span class="p">:</span><span
class="k">PLAN</span><span class="w"> </span><span
class="n">FRAGMENT</span><span class="w"> </span><span class="o">[</span><span
class="n">HASH(category)</span><span class="o">]</span><span class="w">
</span><span class="n">hosts</span><span class="o">=</span><span
class="mi">1</span><span class="w"> </span><span
class="n">instances</span><span class="o">=</span><span class="mi">1</span>
+<span class="mi">03</span><span class="err">:</span><span
class="k">AGGREGATE</span><span class="w"> </span><span class="o">[</span><span
class="n">FINALIZE</span><span class="o">]</span>
+<span class="p">...</span>
+<span class="mi">02</span><span class="err">:</span><span
class="n">EXCHANGE</span><span class="w"> </span><span class="o">[</span><span
class="n">HASH(category)</span><span class="o">]</span>
+<span class="p">...</span>
+<span class="nl">F00</span><span class="p">:</span><span
class="k">PLAN</span><span class="w"> </span><span
class="n">FRAGMENT</span><span class="w"> </span><span class="o">[</span><span
class="n">RANDOM</span><span class="o">]</span><span class="w"> </span><span
class="n">hosts</span><span class="o">=</span><span class="mi">1</span><span
class="w"> </span><span class="n">instances</span><span class="o">=</span><span
class="mi">1</span>
+<span class="mi">01</span><span class="err">:</span><span
class="k">AGGREGATE</span><span class="w"> </span><span class="o">[</span><span
class="n">STREAMING</span><span class="o">]</span>
+<span class="p">...</span>
+<span class="mi">00</span><span class="err">:</span><span
class="n">SCAN</span><span class="w"> </span><span class="n">HDFS</span><span
class="w"> </span><span class="o">[</span><span class="n">default.sales_data,
RANDOM</span><span class="o">]</span>
+<span class="p">...</span>
+<span class="w"> </span><span class="n">Fragment</span><span class="w">
</span><span class="nl">F02</span><span class="p">:</span>
+<span class="w"> </span><span class="nl">CodeGen</span><span
class="p">:</span>
+<span class="w"> </span><span class="p">...</span>
+<span class="w"> </span><span class="o">-</span><span class="w">
</span><span class="nl">NumCachedFunctions</span><span class="p">:</span><span
class="w"> </span><span class="mi">0</span><span class="w"> </span><span
class="p">(</span><span class="mi">0</span><span class="p">)</span>
+<span class="w"> </span><span class="p">...</span>
+<span class="w"> </span><span class="o">-</span><span class="w">
</span><span class="nl">NumOptimizedFunctions</span><span
class="p">:</span><span class="w"> </span><span class="mi">2</span><span
class="w"> </span><span class="p">(</span><span class="mi">2</span><span
class="p">)</span>
+<span class="w"> </span><span class="p">...</span>
+<span class="w"> </span><span class="o">-</span><span class="w">
</span><span class="nl">TotalTime</span><span class="p">:</span><span
class="w"> </span><span class="mf">52.000</span><span class="n">ms</span>
+<span class="w"> </span><span class="n">Fragment</span><span class="w">
</span><span class="nl">F01</span><span class="p">:</span>
+<span class="w"> </span><span class="nl">CodeGen</span><span
class="p">:</span>
+<span class="w"> </span><span class="p">...</span>
+<span class="w"> </span><span class="o">-</span><span class="w">
</span><span class="nl">NumCachedFunctions</span><span class="p">:</span><span
class="w"> </span><span class="mi">0</span><span class="w"> </span><span
class="p">(</span><span class="mi">0</span><span class="p">)</span>
+<span class="w"> </span><span class="p">...</span>
+<span class="w"> </span><span class="o">-</span><span class="w">
</span><span class="nl">NumOptimizedFunctions</span><span
class="p">:</span><span class="w"> </span><span class="mi">20</span><span
class="w"> </span><span class="p">(</span><span class="mi">20</span><span
class="p">)</span>
+<span class="w"> </span><span class="p">...</span>
+<span class="w"> </span><span class="o">-</span><span class="w">
</span><span class="nl">TotalTime</span><span class="p">:</span><span
class="w"> </span><span class="mf">100.000</span><span class="n">ms</span>
+<span class="w"> </span><span class="n">Fragment</span><span class="w">
</span><span class="nl">F00</span><span class="p">:</span>
+<span class="w"> </span><span class="nl">CodeGen</span><span
class="p">:</span>
+<span class="w"> </span><span class="p">...</span>
+<span class="w"> </span><span class="o">-</span><span class="w">
</span><span class="nl">NumCachedFunctions</span><span class="p">:</span><span
class="w"> </span><span class="mi">0</span><span class="w"> </span><span
class="p">(</span><span class="mi">0</span><span class="p">)</span>
+<span class="w"> </span><span class="p">...</span>
+<span class="w"> </span><span class="o">-</span><span class="w">
</span><span class="nl">NumOptimizedFunctions</span><span
class="p">:</span><span class="w"> </span><span class="mi">20</span><span
class="w"> </span><span class="p">(</span><span class="mi">20</span><span
class="p">)</span>
+<span class="w"> </span><span class="p">...</span>
+<span class="w"> </span><span class="o">-</span><span class="w">
</span><span class="nl">TotalTime</span><span class="p">:</span><span
class="w"> </span><span class="mf">116.000</span><span class="n">ms</span>
+</pre></div>
+
+<p>After running Query 2, the functions of fragments F02 and F01 are
successfully loaded from the codegen cache, because these fragments are
identical in both queries, largely reducing the total codegen compilation
time. However, Fragment F00 does not hit the codegen cache because different
predicates are used in the two queries, like in our case, <code>category =
'a'</code> vs. <code>category = 'b'</code>. As a result, the codegen functions
in the corresponding scan nodes are treated a [...]
+<div class="code"><pre class="code literal-block"> Fragment F02:
+ CodeGen:
+ ...
+ - NumCachedFunctions: 2 (2)
+ ...
+ - NumOptimizedFunctions: 2 (2)
+ ...
+ - TotalTime: 32.000ms
+ Fragment F01:
+ CodeGen:
+ ...
+ - NumCachedFunctions: 20 (20)
+ ...
+ - NumOptimizedFunctions: 20 (20)
+ ...
+ - TotalTime: 40.000ms
+ Fragment F00:
+ CodeGen:
+ ...
+ - NumCachedFunctions: 0 (0)
+ ...
+ - NumOptimizedFunctions: 20 (20)
+ ...
+ - TotalTime: 112.000ms
+</pre></div>
+
+<p>Note that native UDF won't be supported by the codegen cache, if a fragment
contains any native UDF, the codegen of that fragment won't be cached.</p>
+<h4>Summary</h4>
+<p>Codegen Cache is supported and enabled by default since Impala 4.3. By
setting the flag file option <code>CODEGEN_CACHE_CAPACITY</code>, you can
adjust its default value of the memory used for codegen cache.</p>
+<p>Interested in contributing? We have future work planned here for codegen
caching - <a
href="https://issues.apache.org/jira/browse/IMPALA-13187">IMPALA-13187</a></p>
+<p><em>Reblogged with edit from <a
href="https://medium.com/engineering-cloudera/codegen-cache-for-low-latency-queries-47d5fd947fcf">Engineering@Cloudera
on Medium</a></em></p>
+ </div>
+ <aside class="postpromonav"><nav><ul itemprop="keywords" class="tags">
+<li><a class="tag p-category" href="../../categories/codegen/"
rel="tag">codegen</a></li>
+ </ul>
+<ul class="pager hidden-print">
+<li class="previous">
+ <a href="../healing-iceberg-tables-with-impala/" rel="prev"
title="Healing Iceberg Tables with Impala">Previous post</a>
+ </li>
+ </ul></nav></aside></article></main><footer id="footer"><p> </p>
+ <div class="navbar">
+ <div class="navbar-inner">
+ <div class="container">
+
+ <ul class="nav">
+<li><a href="https://www.apache.org/licenses/">License</a></li>
+ <li><a
href="https://www.apache.org/foundation/sponsorship.html">Sponsorship</a></li>
+ <li><a
href="https://www.apache.org/foundation/thanks.html">Thanks</a></li>
+ <li><a href="https://www.apache.org/security/">Security</a></li>
+ <li><a href="https://www.apache.org/">Apache Software
Foundation</a></li>
+ </ul>
+</div>
+<!-- container -->
+ </div>
+<!-- navbar-inner -->
+ </div>
+<!-- navbar -->
+
+ <div class="footer">
+ <center>
+ <a href="https://www.apache.org/events/current-event.html">
+ <img
src="https://www.apache.org/events/current-event-234x60.png"></a>
+ </center>
+ <p>Apache Impala, Impala, Apache, the Apache feather logo, and the Apache
+ Impala project logo are either registered trademarks or trademarks of
The
+ Apache Software Foundation in the United States and other countries.
+ </p>
+ </div>
+<!-- footer -->
+
+ <p>Contents © 2016-2024 <a
href="mailto:[email protected]">Impala Dev</a> - Powered by <a
href="https://getnikola.com" rel="nofollow">Nikola</a> </p>
+ </footer>
+</div>
+<!-- container -->
+</body>
+</html>
diff --git a/blog/posts/healing-iceberg-tables-with-impala/index.html
b/blog/posts/healing-iceberg-tables-with-impala/index.html
index bfb3162a4..686918081 100644
--- a/blog/posts/healing-iceberg-tables-with-impala/index.html
+++ b/blog/posts/healing-iceberg-tables-with-impala/index.html
@@ -13,6 +13,7 @@
<link rel="canonical"
href="https://impala.apache.org/blog/posts/healing-iceberg-tables-with-impala/">
<meta name="author" content="Noémi Pap-Takács">
<link rel="prev"
href="../intelligent-utilization-aware-autoscaling-for-impala-virtual-compute-clusters/"
title="Intelligent Utilization Aware Autoscaling for Impala Virtual Compute
Clusters" type="text/html">
+<link rel="next" href="../codegen-cache-for-low-latency-queries/"
title="Codegen cache for low latency queries" type="text/html">
</head>
<body id="index" class="home">
<div class="container">
@@ -85,6 +86,9 @@ DROP PARTITION statement allows selective partition removal
based on predicates.
<li class="previous">
<a
href="../intelligent-utilization-aware-autoscaling-for-impala-virtual-compute-clusters/"
rel="prev" title="Intelligent Utilization Aware Autoscaling for Impala Virtual
Compute Clusters">Previous post</a>
</li>
+ <li class="next">
+ <a href="../codegen-cache-for-low-latency-queries/" rel="next"
title="Codegen cache for low latency queries">Next post</a>
+ </li>
</ul></nav></aside></article></main><footer id="footer"><p> </p>
<div class="navbar">
<div class="navbar-inner">
diff --git a/blog/posts/impalas-living-on-iceberg/index.html
b/blog/posts/impalas-living-on-iceberg/index.html
index dc5cae183..10c89aa36 100644
--- a/blog/posts/impalas-living-on-iceberg/index.html
+++ b/blog/posts/impalas-living-on-iceberg/index.html
@@ -5,7 +5,7 @@
<meta name="keywords" content="hadoop, impala, sql, mpp, bi, big data, open
source">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<meta charset="utf-8">
-<title>Impala's Living on Iceberg | Apache Impala</title>
+<title>Impalas living on Iceberg | Apache Impala</title>
<link href="../../assets/css/bootstrap.min.css" rel="stylesheet"
type="text/css">
<link href="../../assets/css/bootstrap-responsive.min.css" rel="stylesheet"
type="text/css">
<!-- order is significant to prevent overwriting of some bootstrap-defined css
styles --><link href="../../assets/css/additional_styles.css" rel="stylesheet"
type="text/css">
@@ -56,7 +56,7 @@
</nav></header>
</div>
<!-- masthead -->
- <main id="content"><article class="post-text h-entry hentry postpage"
itemscope="itemscope" itemtype="http://schema.org/Article"><header><h3
class="p-name entry-title" itemprop="headline name"><a href="."
class="u-url">Impala's Living on Iceberg</a></h3>
+ <main id="content"><article class="post-text h-entry hentry postpage"
itemscope="itemscope" itemtype="http://schema.org/Article"><header><h3
class="p-name entry-title" itemprop="headline name"><a href="."
class="u-url">Impalas living on Iceberg</a></h3>
<div class="metadata">
<p class="byline author vcard">
diff --git
a/blog/posts/intelligent-utilization-aware-autoscaling-for-impala-virtual-compute-clusters/index.html
b/blog/posts/intelligent-utilization-aware-autoscaling-for-impala-virtual-compute-clusters/index.html
index d8313717c..f5e109dce 100644
---
a/blog/posts/intelligent-utilization-aware-autoscaling-for-impala-virtual-compute-clusters/index.html
+++
b/blog/posts/intelligent-utilization-aware-autoscaling-for-impala-virtual-compute-clusters/index.html
@@ -13,7 +13,7 @@
<link rel="canonical"
href="https://impala.apache.org/blog/posts/intelligent-utilization-aware-autoscaling-for-impala-virtual-compute-clusters/">
<meta name="author" content="Riza Suminto">
<link rel="prev" href="../impalas-living-on-iceberg/" title="Impalas living on
Iceberg" type="text/html">
-<link rel="next" href="../healing-iceberg-tables-with-impala/" title="Healing
Iceberg tables with Impala" type="text/html">
+<link rel="next" href="../healing-iceberg-tables-with-impala/" title="Healing
Iceberg Tables with Impala" type="text/html">
</head>
<body id="index" class="home">
<div class="container">
@@ -87,7 +87,7 @@ admins/users to manage Virtual Clusters.</p>
<a href="../impalas-living-on-iceberg/" rel="prev"
title="Impalas living on Iceberg">Previous post</a>
</li>
<li class="next">
- <a href="../healing-iceberg-tables-with-impala/" rel="next"
title="Healing Iceberg tables with Impala">Next post</a>
+ <a href="../healing-iceberg-tables-with-impala/" rel="next"
title="Healing Iceberg Tables with Impala">Next post</a>
</li>
</ul></nav></aside></article></main><footer id="footer"><p> </p>
<div class="navbar">
diff --git a/blog/sitemap.xml b/blog/sitemap.xml
index 8e33e39f0..180964a10 100644
--- a/blog/sitemap.xml
+++ b/blog/sitemap.xml
@@ -7,110 +7,134 @@
http://www.sitemaps.org/schemas/sitemap/0.9/sitemap.xsd">
<url>
<loc>https://impala.apache.org/blog/</loc>
- <lastmod>2024-10-17T19:03:00Z</lastmod>
+ <lastmod>2024-10-30T17:23:00Z</lastmod>
</url>
<url>
<loc>https://impala.apache.org/blog/2015/</loc>
- <lastmod>2024-10-14T23:57:00Z</lastmod>
+ <lastmod>2024-10-30T17:23:00Z</lastmod>
</url>
<url>
<loc>https://impala.apache.org/blog/2016/</loc>
- <lastmod>2024-10-14T23:57:00Z</lastmod>
+ <lastmod>2024-10-30T17:23:00Z</lastmod>
</url>
<url>
<loc>https://impala.apache.org/blog/2017/</loc>
- <lastmod>2024-10-14T23:57:00Z</lastmod>
+ <lastmod>2024-10-30T17:23:00Z</lastmod>
</url>
<url>
<loc>https://impala.apache.org/blog/2024/</loc>
- <lastmod>2024-10-17T19:03:00Z</lastmod>
+ <lastmod>2024-10-30T17:23:00Z</lastmod>
</url>
<url>
<loc>https://impala.apache.org/blog/archive.html</loc>
- <lastmod>2024-10-15T01:21:00Z</lastmod>
+ <lastmod>2024-10-30T17:23:00Z</lastmod>
</url>
<url>
<loc>https://impala.apache.org/blog/authors/</loc>
- <lastmod>2024-10-15T17:32:00Z</lastmod>
+ <lastmod>2024-10-30T17:23:00Z</lastmod>
+ </url>
+ <url>
+ <loc>https://impala.apache.org/blog/authors/abhishek-rawat/</loc>
+ <lastmod>2024-10-30T17:27:00Z</lastmod>
+ </url>
+ <url>
+ <loc>https://impala.apache.org/blog/authors/david-rorke/</loc>
+ <lastmod>2024-10-30T17:27:00Z</lastmod>
</url>
<url>
<loc>https://impala.apache.org/blog/authors/gabor-kaszab/</loc>
- <lastmod>2024-10-17T19:03:00Z</lastmod>
+ <lastmod>2024-10-30T17:32:00Z</lastmod>
</url>
<url>
<loc>https://impala.apache.org/blog/authors/impala-dev/</loc>
- <lastmod>2024-10-15T16:24:00Z</lastmod>
+ <lastmod>2024-10-30T17:23:00Z</lastmod>
+ </url>
+ <url>
+ <loc>https://impala.apache.org/blog/authors/michael-smith/</loc>
+ <lastmod>2024-10-30T17:27:00Z</lastmod>
</url>
<url>
<loc>https://impala.apache.org/blog/authors/noemi-pap-takacs/</loc>
- <lastmod>2024-10-17T19:03:00Z</lastmod>
+ <lastmod>2024-10-30T17:23:00Z</lastmod>
</url>
<url>
<loc>https://impala.apache.org/blog/authors/riza-suminto/</loc>
- <lastmod>2024-10-15T16:24:00Z</lastmod>
+ <lastmod>2024-10-30T17:23:00Z</lastmod>
</url>
<url>
- <loc>https://impala.apache.org/blog/categories/</loc>
- <lastmod>2024-10-17T19:04:00Z</lastmod>
+ <loc>https://impala.apache.org/blog/authors/yida-wu/</loc>
+ <lastmod>2024-10-30T17:27:00Z</lastmod>
</url>
<url>
- <loc>https://impala.apache.org/blog/categories/cat_blogs/</loc>
- <lastmod>2024-10-17T19:04:00Z</lastmod>
+ <loc>https://impala.apache.org/blog/categories/</loc>
+ <lastmod>2024-10-30T17:23:00Z</lastmod>
</url>
<url>
- <loc>https://impala.apache.org/blog/categories/cat_posts/</loc>
- <lastmod>2024-10-15T16:24:00Z</lastmod>
+ <loc>https://impala.apache.org/blog/categories/cat_blogs/</loc>
+ <lastmod>2024-10-30T17:27:00Z</lastmod>
</url>
<url>
<loc>https://impala.apache.org/blog/categories/cat_talks/</loc>
- <lastmod>2024-10-17T19:04:00Z</lastmod>
+ <lastmod>2024-10-30T17:32:00Z</lastmod>
</url>
<url>
<loc>https://impala.apache.org/blog/categories/cceu24/</loc>
- <lastmod>2024-10-15T16:24:00Z</lastmod>
+ <lastmod>2024-10-30T17:23:00Z</lastmod>
</url>
<url>
<loc>https://impala.apache.org/blog/categories/ccna24/</loc>
- <lastmod>2024-10-17T19:03:00Z</lastmod>
+ <lastmod>2024-10-30T17:32:00Z</lastmod>
+ </url>
+ <url>
+ <loc>https://impala.apache.org/blog/categories/codegen/</loc>
+ <lastmod>2024-10-30T17:27:00Z</lastmod>
+ </url>
+ <url>
+ <loc>https://impala.apache.org/blog/index-1.html</loc>
+ <lastmod>2024-10-30T17:23:00Z</lastmod>
</url>
<url>
<loc>https://impala.apache.org/blog/posts/anatomy-of-reading-apache-parquet-files-from-the-apache-impala-perspective/</loc>
- <lastmod>2024-10-15T16:24:00Z</lastmod>
+ <lastmod>2024-10-30T17:23:00Z</lastmod>
+ </url>
+ <url>
+
<loc>https://impala.apache.org/blog/posts/codegen-cache-for-low-latency-queries/</loc>
+ <lastmod>2024-10-30T17:23:00Z</lastmod>
</url>
<url>
<loc>https://impala.apache.org/blog/posts/healing-iceberg-tables-with-impala/</loc>
- <lastmod>2024-10-17T19:03:00Z</lastmod>
+ <lastmod>2024-10-30T17:23:00Z</lastmod>
</url>
<url>
<loc>https://impala.apache.org/blog/posts/impala-25-performance-overview/</loc>
- <lastmod>2024-10-14T23:57:00Z</lastmod>
+ <lastmod>2024-10-30T17:23:00Z</lastmod>
</url>
<url>
<loc>https://impala.apache.org/blog/posts/impala-a-modern-open-source-sql-engine-for-hadoop/</loc>
- <lastmod>2024-10-14T23:57:00Z</lastmod>
+ <lastmod>2024-10-30T17:23:00Z</lastmod>
</url>
<url>
<loc>https://impala.apache.org/blog/posts/impala-blog-coming-soon/</loc>
- <lastmod>2024-10-14T23:57:00Z</lastmod>
+ <lastmod>2024-10-30T17:23:00Z</lastmod>
</url>
<url>
<loc>https://impala.apache.org/blog/posts/impalas-living-on-iceberg/</loc>
- <lastmod>2024-10-17T19:03:00Z</lastmod>
+ <lastmod>2024-10-30T17:23:00Z</lastmod>
</url>
<url>
<loc>https://impala.apache.org/blog/posts/intelligent-utilization-aware-autoscaling-for-impala-virtual-compute-clusters/</loc>
- <lastmod>2024-10-15T16:24:00Z</lastmod>
+ <lastmod>2024-10-30T17:30:00Z</lastmod>
</url>
<url>
<loc>https://impala.apache.org/blog/posts/lets-see-how-fast-impala-runs-on-iceberg/</loc>
- <lastmod>2024-10-15T16:24:00Z</lastmod>
+ <lastmod>2024-10-30T17:23:00Z</lastmod>
</url>
<url>
<loc>https://impala.apache.org/blog/posts/nested-types-in-impala/</loc>
- <lastmod>2024-10-14T23:57:00Z</lastmod>
+ <lastmod>2024-10-30T17:23:00Z</lastmod>
</url>
<url>
<loc>https://impala.apache.org/blog/posts/this-impala-not-only-reads-but-modifies-and-optimizes-iceberg-tables/</loc>
- <lastmod>2024-10-15T16:24:00Z</lastmod>
+ <lastmod>2024-10-30T17:26:00Z</lastmod>
</url>
</urlset>
\ No newline at end of file
diff --git a/blog/sitemapindex.xml b/blog/sitemapindex.xml
index 91c916c80..54122ce7e 100644
--- a/blog/sitemapindex.xml
+++ b/blog/sitemapindex.xml
@@ -7,6 +7,6 @@
http://www.sitemaps.org/schemas/sitemap/0.9/sitemap.xsd">
<sitemap>
<loc>https://impala.apache.org/blog/sitemap.xml</loc>
- <lastmod>2024-10-17T19:05:00Z</lastmod>
+ <lastmod>2024-10-30T18:32:00Z</lastmod>
</sitemap>
</sitemapindex>
\ No newline at end of file
diff --git a/nikola_site_generator/conf.py b/nikola_site_generator/conf.py
index 281b600e1..581d9920c 100644
--- a/nikola_site_generator/conf.py
+++ b/nikola_site_generator/conf.py
@@ -498,6 +498,10 @@ HIDDEN_CATEGORIES = []
# Tag pages will still be generated.
HIDDEN_AUTHORS = ['Guest']
+# Allow multiple, comma-separated authors for a post? (Requires theme support,
+# present in built-in themes)
+MULTIPLE_AUTHORS_PER_POST = True
+
# Final location for the main blog page and sibling paginated pages is
# output / TRANSLATION[lang] / INDEX_PATH / index-*.html
# INDEX_PATH = ""
diff --git a/nikola_site_generator/images/codegen-cache-perf.png
b/nikola_site_generator/images/codegen-cache-perf.png
new file mode 100644
index 000000000..2287acac0
Binary files /dev/null and
b/nikola_site_generator/images/codegen-cache-perf.png differ
diff --git a/nikola_site_generator/images/query-exec.png
b/nikola_site_generator/images/query-exec.png
new file mode 100644
index 000000000..21d97a038
Binary files /dev/null and b/nikola_site_generator/images/query-exec.png differ
diff --git
a/nikola_site_generator/posts/codegen-cache-for-low-latency-queries.md
b/nikola_site_generator/posts/codegen-cache-for-low-latency-queries.md
new file mode 100644
index 000000000..9587d9763
--- /dev/null
+++ b/nikola_site_generator/posts/codegen-cache-for-low-latency-queries.md
@@ -0,0 +1,142 @@
+<!--
+.. title: Codegen cache for low latency queries
+.. slug: codegen-cache-for-low-latency-queries
+.. date: 2024-10-29 14:00:00 UTC-07:00
+.. tags: codegen
+.. category: blogs
+.. link:
+.. description: Making fast queries faster in Impala
+.. type: text
+.. author: Michael Smith, Yida Wu, David Rorke, Abhishek Rawat
+-->
+
+Apache Impala is a high-performance engine - written primarily in C++ - for
executing low-latency SQL queries. At a high level, Impala generates a
distributed query plan (first two phases in yellow), admits the query once
sufficient capacity is available, and finally executes the query. For a more
in-depth description of these phases please refer to [Impala: A Modern,
Open-Source SQL Engine for
Hadoop](https://www.cidrdb.org/cidr2015/Papers/CIDR15_Paper28.pdf).
+
+{: style="display: block; margin: 0
auto;"}
+
+During Distributed Execution, each fragment of the query plan is run on one or
more Impala executors, with a degree of parallelism determined by the planner.
A fragment is a distinct block of work that can be executed on a single node,
and often comprises steps such as scanning and filtering rows from files (or
other data sources), hashing that data to group or order it, and sending it to
other executors via an exchange for distributed aggregation.
+
+### Code Generation
+
+The steps taken within each fragment comprise the bulk of the work an executor
does, and databases use different techniques to optimize that work. The actual
operations needed will depend on the types of the specific columns being
manipulated, which may be simple scalar types or complex data such as structs
and arrays. At the beginning of executing each fragment, Impala leverages the
[LLVM project](https://llvm.org) to generate machine code specific to the steps
and columns in the fragment.
+
+Code generation can dramatically speed up the operations done on each row, but
has an initial overhead in generating the code that offsets that benefit. This
initial overhead of generating code becomes relevant to sub second and low
second queries because codegen time of say 100-250 ms is relevant if the query
only takes 2 seconds to finish. Typical examples of such queries are queries on
kudu tables that finish in seconds. Historically we recommended users to either
`set DISABLE_CODEGEN [...]
+
+`DISABLE_CODEGEN_ROWS_THRESHOLD` currently estimates the number rows being
processed on each of the nodes and then decides whether codegen should be
disabled. There are scenarios where the planner estimate is incorrect or the
query is complex and codegen would have actually helped.
+
+To help mitigate the cost of codegen for short running queries that are run
repeatedly we've introduced a new codegen caching feature. With codegen cache
enabled, code generation for queries will be cached, and subsequent runs will
be faster by not needing to regenerate that code.
+
+Using Cloudera Data Warehouse 1.9.2 with Runtime 2024.0.18.0-206 on AWS EC2
r5d.4xlarge instances, we performed a TPC-DS 1 TB benchmark with 10 executors
to evaluate codegen cache performance. Across the whole test suite we saw
geometric mean times improve by 4.8%. Since we expect codegen cache to help
more with faster queries, we isolate the queries that executed in less than 2s:
+
+{: style="display:
block; margin: 0 auto;"}
+
+For these queries, we see a geometric mean improvement of 22%, significantly
improving the performance of low latency queries by eliminating most of the
code generation time.
+
+### The Codegen Cache
+
+[Caching Codegen
Functions](https://docs.cloudera.com/cdw-runtime/cloud/impala-reference/topics/impala-codegencaching.html)
has been added to reduce the cost of code generation when repeating queries or
running substantially similar queries by caching the results of code
generation. The codegen cache in Impala works at the fragment level, meaning
that it caches and reuses the machine code for specific fragments of a query.
+
+When Impala generates code using LLVM and the codegen cache is enabled, it
will store the generated objects using [LLVM’s Object
Caching](https://blog.llvm.org/2013/08/object-caching-with-kaleidoscope.html).
Impala goes through several steps during codegen:
+
+1. Load pre-parsed and partially optimized Impala library functions so that
new code generation can reference them.
+1. Define functions representing the operations to be performed using LLVM’s
intermediate representation (IR).
+1. Prune unused library functions loaded in step (1).
+1. Run LLVM’s builtin passes to optimize the IR generated through steps 1-3.
+1. Generate machine code from the optimized IR.
+
+The most time consuming portion of these are optimization passes and
generating machine code. When using the codegen cache, Impala performs steps
1-3, then constructs a key based on a serialization of the IR. It then looks
for a match for the key in the codegen cache; if found, the result will be a
machine code object that’s ready for immediate use; otherwise steps 4 and 5 are
performed to generate machine code, which will then be stored to the codegen
cache and used.
+
+The codegen cache stores all objects in-memory. Its capacity is determined by
`CODEGEN_CACHE_CAPACITY`. When the cache is full, it evicts the Least-Recently
Used (LRU) object to make space for new entries.
+
+#### Example of Caching Codegen Functions
+
+Consider the following table:
+
+ create table sales_data (product_id int, category string, sales double);
+
+We run two similar queries sequentially:
+
+1. `select category, sum(sales) from sales_data where category = 'a' group by
category;`
+1. `select category, sum(sales) from sales_data where category = 'b' group by
category;`
+
+After running Query 1, the query profile shows the plan as follows, with zero
cached functions and a total codegen compilation time of several dozen
milliseconds for each fragment.
+
+```
+F02:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1
+...
+04:EXCHANGE [UNPARTITIONED]
+...
+F01:PLAN FRAGMENT [HASH(category)] hosts=1 instances=1
+03:AGGREGATE [FINALIZE]
+...
+02:EXCHANGE [HASH(category)]
+...
+F00:PLAN FRAGMENT [RANDOM] hosts=1 instances=1
+01:AGGREGATE [STREAMING]
+...
+00:SCAN HDFS [default.sales_data, RANDOM]
+...
+ Fragment F02:
+ CodeGen:
+ ...
+ - NumCachedFunctions: 0 (0)
+ ...
+ - NumOptimizedFunctions: 2 (2)
+ ...
+ - TotalTime: 52.000ms
+ Fragment F01:
+ CodeGen:
+ ...
+ - NumCachedFunctions: 0 (0)
+ ...
+ - NumOptimizedFunctions: 20 (20)
+ ...
+ - TotalTime: 100.000ms
+ Fragment F00:
+ CodeGen:
+ ...
+ - NumCachedFunctions: 0 (0)
+ ...
+ - NumOptimizedFunctions: 20 (20)
+ ...
+ - TotalTime: 116.000ms
+```
+
+After running Query 2, the functions of fragments F02 and F01 are successfully
loaded from the codegen cache, because these fragments are identical in both
queries, largely reducing the total codegen compilation time. However,
Fragment F00 does not hit the codegen cache because different predicates are
used in the two queries, like in our case, `category = 'a'` vs. `category =
'b'`. As a result, the codegen functions in the corresponding scan nodes are
treated as distinct in the current [...]
+
+```
+ Fragment F02:
+ CodeGen:
+ ...
+ - NumCachedFunctions: 2 (2)
+ ...
+ - NumOptimizedFunctions: 2 (2)
+ ...
+ - TotalTime: 32.000ms
+ Fragment F01:
+ CodeGen:
+ ...
+ - NumCachedFunctions: 20 (20)
+ ...
+ - NumOptimizedFunctions: 20 (20)
+ ...
+ - TotalTime: 40.000ms
+ Fragment F00:
+ CodeGen:
+ ...
+ - NumCachedFunctions: 0 (0)
+ ...
+ - NumOptimizedFunctions: 20 (20)
+ ...
+ - TotalTime: 112.000ms
+```
+
+Note that native UDF won't be supported by the codegen cache, if a fragment
contains any native UDF, the codegen of that fragment won't be cached.
+
+### Summary
+
+Codegen Cache is supported and enabled by default since Impala 4.3. By setting
the flag file option `CODEGEN_CACHE_CAPACITY`, you can adjust its default value
of the memory used for codegen cache.
+
+Interested in contributing? We have future work planned here for codegen
caching - [IMPALA-13187](https://issues.apache.org/jira/browse/IMPALA-13187)
+
+_Reblogged with edit from [Engineering@Cloudera on
Medium](https://medium.com/engineering-cloudera/codegen-cache-for-low-latency-queries-47d5fd947fcf)_
diff --git a/nikola_site_generator/posts/impalas-living-on-iceberg.md
b/nikola_site_generator/posts/impalas-living-on-iceberg.md
index 04a2ceecf..b246c2bb0 100644
--- a/nikola_site_generator/posts/impalas-living-on-iceberg.md
+++ b/nikola_site_generator/posts/impalas-living-on-iceberg.md
@@ -1,5 +1,5 @@
<!--
-.. title: Impala's Living on Iceberg
+.. title: Impalas living on Iceberg
.. slug: impalas-living-on-iceberg
.. date: 2024-10-07 16:00:00 UTC-06:00
.. tags: ccna24
diff --git
a/nikola_site_generator/themes/impala-theme/assets/css/additional_styles.css
b/nikola_site_generator/themes/impala-theme/assets/css/additional_styles.css
index e7ae1cc58..6eb731774 100644
--- a/nikola_site_generator/themes/impala-theme/assets/css/additional_styles.css
+++ b/nikola_site_generator/themes/impala-theme/assets/css/additional_styles.css
@@ -22,6 +22,10 @@ body {
padding-bottom: 40px;
}
+.byline a:not(:last-child):after {
+ content: ",";
+}
+
/* Custom container */
.container-narrow {
margin: 0 auto;
diff --git a/nikola_site_generator/themes/impala-theme/templates/index.tmpl
b/nikola_site_generator/themes/impala-theme/templates/index.tmpl
index 646c45f70..e561badeb 100644
--- a/nikola_site_generator/themes/impala-theme/templates/index.tmpl
+++ b/nikola_site_generator/themes/impala-theme/templates/index.tmpl
@@ -43,7 +43,11 @@ under the License.
<h3 class="p-name entry-title"><a href="{{ post.permalink() }}"
class="u-url">{{ post.title()|e }}</a></h3>
<div class="metadata">
<p class="byline author vcard"><span class="byline-name fn">
- {% if author_pages_generated %}
+ {% if author_pages_generated and multiple_authors_per_post %}
+ {%- for author in post.authors() %}
+ <a href="{{ _link('author', author) }}">{{ author|e }}</a>
+ {% endfor %}
+ {% elif author_pages_generated %}
<a href="{{ _link('author', post.author()) }}">{{
post.author()|e }}</a>
{% else %}
{{ post.author()|e }}
diff --git
a/nikola_site_generator/themes/impala-theme/templates/post_header.tmpl
b/nikola_site_generator/themes/impala-theme/templates/post_header.tmpl
index 0ec24c3bc..6266eb474 100644
--- a/nikola_site_generator/themes/impala-theme/templates/post_header.tmpl
+++ b/nikola_site_generator/themes/impala-theme/templates/post_header.tmpl
@@ -39,7 +39,11 @@ under the License.
<div class="metadata">
<p class="byline author vcard">
<span class="byline-name fn">
- {% if author_pages_generated %}
+ {% if author_pages_generated and multiple_authors_per_post
%}
+ {%- for author in post.authors() %}
+ <a href="{{ _link('author', author) }}">{{ author|e
}}</a>
+ {% endfor %}
+ {% elif author_pages_generated %}
<a href="{{ _link('author', post.author()) }}">{{
post.author()|e }}</a>
{% else %}
{{ post.author()|e }}