This is an automated email from the ASF dual-hosted git repository. palashc pushed a commit to branch PHOENIX-7868 in repository https://gitbox.apache.org/repos/asf/phoenix-adapters.git
commit efab533b2afc10947f39fed0529c5dc595a17c50 Author: Palash Chauhan <[email protected]> AuthorDate: Tue May 26 21:53:00 2026 -0700 PHOENIX-7868 : Docker setup --- .dockerignore | 20 ++ README.md | 33 +++ docker/Dockerfile.hbase-phoenix | 63 +++++ docker/Dockerfile.phoenix-adapters | 89 +++++++ docker/README.md | 333 ++++++++++++++++++++++++++ docker/conf/hbase/hbase-env.sh | 16 ++ docker/conf/hbase/hbase-site.xml | 58 +++++ docker/conf/phoenix-adapters/hbase-site.xml | 21 ++ docker/docker-compose.yml | 186 ++++++++++++++ docker/scripts/hbase-entrypoint.sh | 55 +++++ docker/scripts/phoenix-adapters-entrypoint.sh | 43 ++++ docker/scripts/smoke.sh | 311 ++++++++++++++++++++++++ 12 files changed, 1228 insertions(+) diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000..5e8720b --- /dev/null +++ b/.dockerignore @@ -0,0 +1,20 @@ +**/target/ +**/logs/ +**/*.log +**/*.log.* +**/dynamodb-local-metadata.json +**/heap-dumps/ + +**/*.tar.gz +**/*.tar.bz2 +**/*.zip + +.idea/ +.vscode/ +.cursor/ +.DS_Store + +.git/ +.gitignore + +docker/README.md diff --git a/README.md b/README.md index f48d461..9d51359 100644 --- a/README.md +++ b/README.md @@ -67,6 +67,39 @@ The Phoenix DynamoDB REST service is fully compatible with AWS SDKs. You can con port 8842 with zk-quorum localhost:2181. Alternative to `-z <zk-quorum>` is env variable `ZOO_KEEPER_QUORUM`. +#### One-shot Docker setup (recommended for first-time users) + +Skip steps 1-2 above with the bundled Docker cluster. From a fresh clone: + +**Prerequisites:** Docker Desktop running; `jq` and `curl` on `PATH` +(`brew install jq` on macOS). + +```bash +# 1. Bring up the full stack at the versions pinned in pom.xml and BLOCK +# until every container reports healthy (REST is ~30-60s on cold start). +# First time: ~8-12 min total; subsequent runs are cached. +docker compose -f docker/docker-compose.yml up -d --build --wait + +# 2. Validate it works end-to-end (CRUD + UpdateItem + BatchWriteItem + streams). +bash docker/scripts/smoke.sh +# -> "Result: 20 checks PASSED across 18 API calls" + +# 3. Use it. The DynamoDB-compatible endpoint is at http://localhost:8842 . +# Point any AWS SDK at it (Java/Python/Node.js snippets in +# phoenix-ddb-rest/README.md), or hit it with curl: +curl -s -X POST http://localhost:8842/ \ + -H 'Content-Type: application/x-amz-json-1.0' \ + -H 'X-Amz-Target: DynamoDB_20120810.ListTables' -d '{}' + +# 4. Tear down when you're done. +docker compose -f docker/docker-compose.yml down -v +``` + +See [`docker/README.md`](docker/README.md) for the full reference: port +mappings, the developer inner loop for code changes, the smoke-test +breakdown, troubleshooting, and how to run the REST server outside +Docker against the dockerized cluster. + ### Building Distribution Tarball To build a distribution tarball that includes all components: diff --git a/docker/Dockerfile.hbase-phoenix b/docker/Dockerfile.hbase-phoenix new file mode 100644 index 0000000..d92e1aa --- /dev/null +++ b/docker/Dockerfile.hbase-phoenix @@ -0,0 +1,63 @@ +# syntax=docker/dockerfile:1 +FROM eclipse-temurin:8-jdk-jammy + +ARG HBASE_VERSION=2.5.14 +ARG HBASE_FLAVOR=hadoop3 +ARG PHOENIX_HBASE_LINE=2.5 +ARG PHOENIX_VERSION=5.3.1 + +ENV HBASE_VERSION=${HBASE_VERSION} \ + HBASE_FLAVOR=${HBASE_FLAVOR} \ + PHOENIX_HBASE_LINE=${PHOENIX_HBASE_LINE} \ + PHOENIX_VERSION=${PHOENIX_VERSION} \ + JAVA_HOME=/opt/java/openjdk \ + HBASE_HOME=/opt/hbase \ + HBASE_CONF_DIR=/opt/hbase/conf \ + PHOENIX_HOME=/opt/phoenix \ + HBASE_MANAGES_ZK=false \ + PATH=/opt/hbase/bin:/opt/phoenix/bin:/opt/java/openjdk/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin + +RUN set -eux; \ + apt-get update; \ + apt-get install -y --no-install-recommends \ + bash curl ca-certificates netcat-openbsd procps tini less; \ + rm -rf /var/lib/apt/lists/* + +RUN set -eux; \ + mkdir -p "${HBASE_HOME}"; \ + curl -fSL --retry 5 --retry-delay 5 \ + "https://archive.apache.org/dist/hbase/${HBASE_VERSION}/hbase-${HBASE_VERSION}-${HBASE_FLAVOR}-bin.tar.gz" \ + -o /tmp/hbase.tar.gz; \ + tar -xzf /tmp/hbase.tar.gz -C "${HBASE_HOME}" --strip-components=1; \ + rm /tmp/hbase.tar.gz; \ + mkdir -p /var/log/hbase /var/run/hbase + +# phoenix-server JAR is copied into HBase's lib so the coprocessors and +# the IndexedWALEditCodec are visible to both the master and every RS. +RUN set -eux; \ + mkdir -p "${PHOENIX_HOME}"; \ + curl -fSL --retry 5 --retry-delay 5 \ + "https://archive.apache.org/dist/phoenix/phoenix-${PHOENIX_VERSION}/phoenix-hbase-${PHOENIX_HBASE_LINE}-${PHOENIX_VERSION}-bin.tar.gz" \ + -o /tmp/phoenix.tar.gz; \ + tar -xzf /tmp/phoenix.tar.gz -C "${PHOENIX_HOME}" --strip-components=1; \ + rm /tmp/phoenix.tar.gz; \ + cp "${PHOENIX_HOME}/phoenix-server-hbase-${PHOENIX_HBASE_LINE}-${PHOENIX_VERSION}.jar" "${HBASE_HOME}/lib/" + +# Kept below the tarball downloads to preserve their (multi-hundred-MB) cache. +# python3 is required by /opt/phoenix/bin/sqlline.py. +RUN set -eux; \ + apt-get update; \ + apt-get install -y --no-install-recommends python3; \ + rm -rf /var/lib/apt/lists/*; \ + ln -sf /usr/bin/python3 /usr/local/bin/python + +COPY conf/hbase/hbase-site.xml ${HBASE_HOME}/conf/hbase-site.xml +COPY conf/hbase/hbase-env.sh ${HBASE_HOME}/conf/hbase-env.sh + +COPY scripts/hbase-entrypoint.sh /usr/local/bin/entrypoint.sh +RUN chmod +x /usr/local/bin/entrypoint.sh + +WORKDIR /opt + +ENTRYPOINT ["/usr/bin/tini", "--", "/usr/local/bin/entrypoint.sh"] +CMD ["help"] diff --git a/docker/Dockerfile.phoenix-adapters b/docker/Dockerfile.phoenix-adapters new file mode 100644 index 0000000..abb78f0 --- /dev/null +++ b/docker/Dockerfile.phoenix-adapters @@ -0,0 +1,89 @@ +# syntax=docker/dockerfile:1 +# +# Build context: project root (the Maven reactor needs every module). +# +FROM maven:3.9-eclipse-temurin-8 AS builder + +WORKDIR /workspace + +# Copy poms first to maximise dep-layer cache hits on rebuild. +COPY pom.xml ./ +COPY phoenix-ddb-utils/pom.xml phoenix-ddb-utils/pom.xml +COPY phoenix-ddb-rest/pom.xml phoenix-ddb-rest/pom.xml +COPY phoenix-ddb-assembly/pom.xml phoenix-ddb-assembly/pom.xml +COPY coverage-report/pom.xml coverage-report/pom.xml + +# `|| true` because the cross-module reactor can't resolve siblings yet; +# this step is only here to warm ~/.m2. +RUN --mount=type=cache,target=/root/.m2 \ + mvn -B -q -DskipTests \ + -pl phoenix-ddb-utils,phoenix-ddb-rest,phoenix-ddb-assembly -am \ + dependency:go-offline || true + +COPY phoenix-ddb-utils phoenix-ddb-utils +COPY phoenix-ddb-rest phoenix-ddb-rest +COPY phoenix-ddb-assembly phoenix-ddb-assembly +COPY coverage-report coverage-report +COPY bin bin +COPY conf conf +COPY README.md DDB_API_REFERENCE.md ./ + +RUN --mount=type=cache,target=/root/.m2 \ + mvn -B -DskipTests \ + -pl phoenix-ddb-assembly -am \ + clean package + +RUN set -eux; \ + tarball=$(ls phoenix-ddb-assembly/target/phoenix-adapters-*-bin.tar.gz | head -n1); \ + cp "${tarball}" /tmp/phoenix-adapters-bin.tar.gz + +FROM eclipse-temurin:8-jdk-jammy + +ENV JAVA_HOME=/opt/java/openjdk \ + PHOENIX_ADAPTERS_HOME=/opt/phoenix-adapters \ + PHOENIX_ADAPTERS_CONF_DIR=/opt/phoenix-adapters/conf \ + PHOENIX_ADAPTERS_LOG_DIR=/var/log/phoenix-adapters \ + PHOENIX_ADAPTERS_PID_DIR=/var/run/phoenix-adapters \ + PHOENIX_REST_PORT=8842 \ + ZOO_KEEPER_QUORUM=zookeeper:2181 \ + HBASE_MASTER_HOST=hbase-master \ + HBASE_MASTER_PORT=16000 + +RUN set -eux; \ + apt-get update; \ + apt-get install -y --no-install-recommends \ + bash curl ca-certificates netcat-openbsd tini procps; \ + rm -rf /var/lib/apt/lists/*; \ + mkdir -p "${PHOENIX_ADAPTERS_LOG_DIR}" "${PHOENIX_ADAPTERS_PID_DIR}" + +COPY --from=builder /tmp/phoenix-adapters-bin.tar.gz /tmp/phoenix-adapters-bin.tar.gz + +RUN set -eux; \ + mkdir -p "${PHOENIX_ADAPTERS_HOME}"; \ + tar -xzf /tmp/phoenix-adapters-bin.tar.gz -C "${PHOENIX_ADAPTERS_HOME}" --strip-components=1; \ + rm /tmp/phoenix-adapters-bin.tar.gz; \ + chmod -R +x "${PHOENIX_ADAPTERS_HOME}/bin"; \ + # The assembly ships a mix of hadoop-common 3.3.6 (declared in pom.xml) + # and hadoop-hdfs/yarn/mapreduce 3.4.x (transitive from phoenix-core-client + # via hbase-server:2.5.14-hadoop3). The 3.4.x jars register FileSystem + # impls that reference `WithErasureCoding`, a class only present in + # hadoop-common 3.4.x. When HBase returns a remote exception during + # bootstrap, the client's classloader tries to enumerate FileSystem + # impls, hits NoClassDefFoundError, and poisons the JVM. The REST + # server only talks to HBase via RPC and never opens HDFS directly, + # so we strip the 3.4.x hadoop client jars to break the cycle. + rm -f "${PHOENIX_ADAPTERS_HOME}/lib/hadoop-hdfs-"*.jar \ + "${PHOENIX_ADAPTERS_HOME}/lib/hadoop-hdfs-client-"*.jar \ + "${PHOENIX_ADAPTERS_HOME}/lib/hadoop-yarn-"*.jar \ + "${PHOENIX_ADAPTERS_HOME}/lib/hadoop-mapreduce-client-"*.jar \ + "${PHOENIX_ADAPTERS_HOME}/lib/hadoop-distcp-"*.jar + +# Client-side WAL codec / RPC controller must match the server cluster. +COPY docker/conf/phoenix-adapters/hbase-site.xml ${PHOENIX_ADAPTERS_CONF_DIR}/hbase-site.xml + +COPY docker/scripts/phoenix-adapters-entrypoint.sh /usr/local/bin/entrypoint.sh +RUN chmod +x /usr/local/bin/entrypoint.sh + +EXPOSE 8842 + +ENTRYPOINT ["/usr/bin/tini", "--", "/usr/local/bin/entrypoint.sh"] diff --git a/docker/README.md b/docker/README.md new file mode 100644 index 0000000..618bee1 --- /dev/null +++ b/docker/README.md @@ -0,0 +1,333 @@ +# Local Docker Cluster for Phoenix Adapters + +Brings up the full dependency stack (Hadoop / ZooKeeper / HBase / Phoenix) +required to run **phoenix-adapters** on your laptop. Uses upstream images +where they exist; custom only where they don't. + +| Component | Version | Image | +| --- | --- | --- | +| Apache ZooKeeper | 3.8.4 | [`library/zookeeper:3.8.4`](https://hub.docker.com/_/zookeeper) (Docker Official) | +| Apache Hadoop (HDFS) | 3.3.6 | [`apache/hadoop:3.3.6`](https://hub.docker.com/r/apache/hadoop) (Apache convenience build) | +| Apache HBase | 2.5.14-hadoop3 | `phoenix-adapters/hbase-phoenix:latest` (custom) | +| Apache Phoenix | 5.3.1 (phoenix-hbase-2.5) | bundled into `phoenix-adapters/hbase-phoenix` | +| Phoenix Adapters REST | this repo | `phoenix-adapters/rest:latest` (custom) | + +Versions are kept in lockstep with the top-level [`pom.xml`](../pom.xml). + +> **Apple Silicon.** `apache/hadoop:3.3.6` is amd64-only; the compose file +> pins `platform: linux/amd64` so the NameNode/DataNode run under Rosetta +> emulation. Slower than native, but functional. + +## Layout + +``` +docker/ +├── Dockerfile.hbase-phoenix # HBase 2.5.14 + Phoenix 5.3.1 +├── Dockerfile.phoenix-adapters # Multi-stage build of the REST server +├── docker-compose.yml +├── conf/ +│ ├── hbase/{hbase-site.xml,hbase-env.sh} +│ └── phoenix-adapters/hbase-site.xml # Client-side overrides +└── scripts/ + ├── hbase-entrypoint.sh # hbase-master, hbase-regionserver + ├── phoenix-adapters-entrypoint.sh + └── smoke.sh # End-to-end DDB validation suite +``` + +ZooKeeper and Hadoop config lives entirely in `docker-compose.yml` as env +vars that the upstream images template into XML. + +## Quick start + +**Prerequisites:** Docker Desktop running; `jq` and `curl` on `PATH` +(`brew install jq` on macOS). + +From the **project root**: + +```bash +# 1. Bring up the full stack (ZK + HDFS + HBase+Phoenix + REST) and BLOCK +# until every service reports healthy (REST takes ~30-60s on a cold +# start because Phoenix has to bootstrap SYSTEM.* tables). +# First time: ~8-12 min (pulls upstream images + builds HBase/Phoenix + REST). +# Subsequent runs: cached. +docker compose -f docker/docker-compose.yml up -d --build --wait + +# 2. Validate it works end-to-end (CRUD + UpdateItem + BatchWriteItem + streams). +bash docker/scripts/smoke.sh +# -> "Result: 20 checks PASSED across 18 API calls" + +# 3. Use it. The DynamoDB-compatible REST endpoint is at http://localhost:8842 . +# Point any AWS SDK at it (Java/Python/Node.js snippets in +# phoenix-ddb-rest/README.md), or hit it directly with curl: +curl -s -X POST http://localhost:8842/ \ + -H 'Content-Type: application/x-amz-json-1.0' \ + -H 'X-Amz-Target: DynamoDB_20120810.ListTables' -d '{}' + +# 4. Tear down when you're done. +docker compose -f docker/docker-compose.yml down # keep volumes +docker compose -f docker/docker-compose.yml down -v # also wipe HDFS + ZK +``` + +### URLs + +| URL | Service | +| --- | --- | +| http://localhost:8842 | **Phoenix Adapters REST (DynamoDB-compatible)** | +| http://localhost:9870 | HDFS NameNode UI | +| http://localhost:9864 | HDFS DataNode UI | +| http://localhost:16010 | HBase Master UI | +| http://localhost:16030 | HBase RegionServer UI | + +Two host ports are remapped because their defaults often collide on dev +machines (macOS AirPlay on 9000, a locally installed Kafka/ZK on 2181): + +| Service | Container | Host | +| --- | --- | --- | +| HDFS NameNode RPC | `namenode:9000` | `localhost:19000` | +| ZooKeeper client | `zookeeper:2181` | `localhost:12181` | + +Inter-container traffic still uses the standard ports. + +### Bring up just the cluster (no REST) + +```bash +docker compose -f docker/docker-compose.yml up -d --build \ + zookeeper namenode datanode hbase-master hbase-regionserver +``` + +## Validation suite + +`docker/scripts/smoke.sh` exercises every supported DynamoDB API against +the running REST server and asserts the expected behaviour. It prints +each request, response, and assertion as it runs. + +```bash +docker compose -f docker/docker-compose.yml up -d --build --wait +bash docker/scripts/smoke.sh +``` + +Exits `0` on full pass; exits non-zero on the first failed assertion and +prints the offending response. + +| Step | API | +| --- | --- | +| 1 | `ListTables` (baseline) | +| 2 | `CreateTable` (with `StreamSpecification` enabled, `NEW_AND_OLD_IMAGES`) | +| 3 | `DescribeTable` | +| 4 | `PutItem` (`id=a`) | +| 5 | `UpdateItem` (`SET score, bonus`, `ReturnValues=ALL_NEW`) | +| 6 | `GetItem` | +| 7 | `PutItem` (`id=b`) | +| 8 | `Scan` | +| 9 | `Query` | +| 10 | `DeleteItem` | +| 11 | `Scan` (after delete) | +| 12 | `BatchWriteItem` (mixed put + delete) | +| 13 | `Scan` paginated (drains all pages) | +| 14 | `ListStreams` | +| 15 | `DescribeStream` (polls until `StreamStatus == ENABLED`) | +| 16 | `GetShardIterator` (`TRIM_HORIZON`) | +| 17 | `GetRecords` (drains all pages) | +| 18 | `DeleteTable` | + +## Poking around the cluster + +HBase shell: + +```bash +docker compose -f docker/docker-compose.yml exec hbase-master hbase shell +``` + +```text +status +list +create 'demo', 'cf' +put 'demo', 'r1', 'cf:c1', 'hello' +scan 'demo' +``` + +Phoenix sqlline: + +```bash +docker compose -f docker/docker-compose.yml exec hbase-master \ + /opt/phoenix/bin/sqlline.py zookeeper:2181 +``` + +```sql +!tables +CREATE TABLE IF NOT EXISTS t1 (id BIGINT PRIMARY KEY, name VARCHAR); +UPSERT INTO t1 VALUES (1, 'phoenix-adapters'); +SELECT * FROM t1; +``` + +## Developer inner loop: code change → live endpoint + +``` +phoenix-ddb-rest/src/**.java + │ (1) edit on host + ▼ +docker compose ... up -d --build phoenix-adapters-rest + ├── stage 1: mvn package -DskipTests (BuildKit caches ~/.m2) + ├── stage 1 output: phoenix-ddb-assembly/target/*-bin.tar.gz + └── stage 2: temurin runtime extracts that tarball + │ + ▼ +http://localhost:8842/ (new code, live) +``` + +The cluster (ZK + HDFS + HBase) keeps running across REST rebuilds, and +HBase data persists across full `down`/`up` cycles. + +### The loop + +1. Edit code in `phoenix-ddb-rest/src/...` or `phoenix-ddb-utils/src/...`. +2. *(Optional)* sanity-check the compile on the host: + + ```bash + mvn -B -DskipTests -pl phoenix-ddb-rest -am package + ``` + +3. Rebuild and recreate just the REST container: + + ```bash + docker compose -f docker/docker-compose.yml up -d --build phoenix-adapters-rest + ``` + + No-dep-change rebuilds typically take 30-60 s on a warm cache. +4. Watch logs: + + ```bash + docker compose -f docker/docker-compose.yml logs -f phoenix-adapters-rest + ``` +5. Hit the endpoint and verify. + +### Quick reference + +| Task | Command | +| --- | --- | +| Rebuild REST + restart it | `docker compose -f docker/docker-compose.yml up -d --build phoenix-adapters-rest` | +| Restart REST (no code change) | `docker compose -f docker/docker-compose.yml restart phoenix-adapters-rest` | +| Tail REST logs | `docker compose -f docker/docker-compose.yml logs -f phoenix-adapters-rest` | +| Tail HBase logs | `docker compose -f docker/docker-compose.yml logs -f hbase-master hbase-regionserver` | +| HBase shell | `docker compose -f docker/docker-compose.yml exec hbase-master hbase shell` | +| Phoenix sqlline | `docker compose -f docker/docker-compose.yml exec hbase-master /opt/phoenix/bin/sqlline.py zookeeper:2181` | +| List containers | `docker compose -f docker/docker-compose.yml ps` | +| Stop (keep data) | `docker compose -f docker/docker-compose.yml down` | +| Stop + wipe data | `docker compose -f docker/docker-compose.yml down -v` | + +### Edge cases + +| Situation | What to do | +| --- | --- | +| Changed `conf/hbase/hbase-site.xml` or `hbase-env.sh` | `docker compose ... up -d --build hbase-master hbase-regionserver`. Existing tables survive. | +| Bumped `hbase.version` / `phoenix.version` in `pom.xml` | Bump matching `ARG`s in `Dockerfile.hbase-phoenix`, then `--build hbase-master hbase-regionserver phoenix-adapters-rest`. Often pair with `down -v`. | +| Added a Maven dep to `phoenix-ddb-rest/pom.xml` | `--build phoenix-adapters-rest`. New dep downloads once; cache warms after. | +| Clean slate | `docker compose ... down -v` then `up -d --build`. | +| Code doesn't seem picked up | You ran `restart` instead of `up --build`. `restart` does not rebuild. | + +### Pre-PR checklist + +```bash +# 1. Host-side compile + unit tests (no cluster required). +mvn -B clean install -DskipITs + +# 2. End-to-end validation: fresh stack + full DDB round-trip including streams. +docker compose -f docker/docker-compose.yml down -v +docker compose -f docker/docker-compose.yml up -d --build --wait +bash docker/scripts/smoke.sh + +# 3. Tear it down. +docker compose -f docker/docker-compose.yml down -v +``` + +If `smoke.sh` finishes with `Result: 20 checks PASSED across 18 API calls`, +your change is wire-compatible end to end through Phoenix on dockerized +HBase across CRUD, batch, and the change-stream chain. + +## Running the REST server outside Docker + +1. Bring up only the cluster services. +2. Add cluster hostnames to `/etc/hosts` (HBase advertises hostnames over ZK): + + ``` + 127.0.0.1 zookeeper namenode datanode hbase-master hbase-regionserver + ``` + +3. Start the REST server pointing at the dockerized ZooKeeper: + + ```bash + mvn -DskipTests clean package + tar xzf phoenix-ddb-assembly/target/phoenix-adapters-*-bin.tar.gz -C /tmp + cd /tmp/phoenix-adapters-* + export JAVA_HOME=$(/usr/libexec/java_home -v 1.8) # macOS example + export PHOENIX_ADAPTERS_HOME=$(pwd) + bin/phoenix-adapters rest foreground_start -p 8842 -z localhost:12181 + ``` + +## Phoenix tuning baked into the image + +[`docker/conf/hbase/hbase-site.xml`](conf/hbase/hbase-site.xml) enables what +Phoenix 5.x needs for secondary indexes, DDL events, and the multi-priority +RPC controller: + +| Property | Value | +| --- | --- | +| `hbase.coprocessor.master.classes` | `…PhoenixMasterObserver` | +| `hbase.coprocessor.regionserver.classes` | `…PhoenixRegionServerEndpoint` | +| `hbase.regionserver.wal.codec` | `…IndexedWALEditCodec` | +| `hbase.region.server.rpc.scheduler.factory.class` | `…PhoenixRpcSchedulerFactory` | +| `hbase.rpc.controllerfactory.class` | `…ServerRpcControllerFactory` | +| `phoenix.task.handling.interval.ms` | `10` | +| `phoenix.task.handling.initial.delay.ms` | `1` | + +`phoenix-server-hbase-2.5-5.3.1.jar` is copied into `${HBASE_HOME}/lib/` so +the coprocessors and WAL codec are visible to master and every RegionServer. + +## Why upstream images for ZK + Hadoop but not HBase? + +| Component | Decision | Reason | +| --- | --- | --- | +| ZooKeeper 3.8.4 | Upstream `zookeeper:3.8.4` | Docker Official, exact version, multi-arch. | +| Hadoop 3.3.6 | Upstream `apache/hadoop:3.3.6` | Apache convenience build at the exact version. amd64-only, runs under emulation on Apple Silicon. | +| HBase 2.5.14-hadoop3 | Custom | No official Apache image; community images don't cover `2.5.14-hadoop3`. | +| Phoenix 5.3.1 | Custom (layered on HBase) | No Phoenix image anywhere; server JAR must be on HBase's classpath. | + +## Troubleshooting + +* **NameNode unhealthy on first start.** First start formats the NameNode + via `ENSURE_NAMENODE_DIR`. Watch with `docker compose ... logs -f namenode`. +* **HBase Master `RegionTooBusyException` / `NotServingRegion`.** Wait ~30 s + after RegionServer comes up; Phoenix bootstraps `SYSTEM.*` tables on its + first connection and the REST server retries transparently. +* **REST exits with `NoClassDefFoundError: org/apache/hadoop/fs/WithErasureCoding`.** + The phoenix-ddb-assembly tarball ships `hadoop-common:3.3.6` (from + `pom.xml`) alongside `hadoop-hdfs:3.4.x` / `hadoop-yarn:3.4.x` + (transitive from `phoenix-core-client`). The 3.4.x JARs register + FileSystem impls that need `WithErasureCoding`, which only exists in + hadoop-common 3.4+. When HBase returns a remote exception during + bootstrap, the client tries to enumerate FileSystem impls, hits + `NoClassDefFoundError`, and poisons the JVM. The REST image + `Dockerfile.phoenix-adapters` strips the 3.4.x `hadoop-hdfs*`, + `hadoop-yarn-*`, `hadoop-mapreduce-client-*`, and `hadoop-distcp-*` + jars after extracting the tarball — the REST server only talks to + HBase via RPC and never opens HDFS directly, so removing them is safe. + If this error reappears, check that those `rm -f` lines in + `Dockerfile.phoenix-adapters` weren't dropped. +* **`Datanode denied communication with namenode`.** Cluster ID mismatch. + `docker compose down -v` and bring the stack back up. +* **`platform mismatch` warnings on Apple Silicon.** Expected for the + Hadoop containers (amd64 image, emulated). No action needed. + +## Customising versions + +HBase / Phoenix versions are `ARG`s on `Dockerfile.hbase-phoenix`: + +```bash +docker compose -f docker/docker-compose.yml build \ + --build-arg HBASE_VERSION=2.5.13 \ + --build-arg PHOENIX_VERSION=5.3.0 \ + hbase-master +``` + +Hadoop and ZooKeeper versions are pinned by tag in `docker-compose.yml`. +Keep all four in lockstep with `pom.xml`. diff --git a/docker/conf/hbase/hbase-env.sh b/docker/conf/hbase/hbase-env.sh new file mode 100644 index 0000000..a5c243b --- /dev/null +++ b/docker/conf/hbase/hbase-env.sh @@ -0,0 +1,16 @@ +#!/usr/bin/env bash +export JAVA_HOME=${JAVA_HOME:-/opt/java/openjdk} +export HBASE_MANAGES_ZK=false +export HBASE_LOG_DIR=/var/log/hbase +export HBASE_PID_DIR=/var/run/hbase + +# Sized to fit the whole stack in ~4 GB of Docker memory. +export HBASE_HEAPSIZE=1G +export HBASE_OFFHEAPSIZE=256m + +# Strip JDK11-specific GC flags HBase ships with; we run on JDK8. +export HBASE_OPTS="-XX:+UseG1GC -XX:+UnlockExperimentalVMOptions" +export HBASE_MASTER_OPTS="${HBASE_OPTS} -Xms256m" +export HBASE_REGIONSERVER_OPTS="${HBASE_OPTS} -Xms512m" + +unset HBASE_JSHELL_ARGS diff --git a/docker/conf/hbase/hbase-site.xml b/docker/conf/hbase/hbase-site.xml new file mode 100644 index 0000000..a47b462 --- /dev/null +++ b/docker/conf/hbase/hbase-site.xml @@ -0,0 +1,58 @@ +<?xml version="1.0"?> +<?xml-stylesheet type="text/xsl" href="configuration.xsl"?> +<configuration> + <property> + <name>hbase.rootdir</name> + <value>hdfs://namenode:9000/hbase</value> + </property> + <property> + <name>hbase.cluster.distributed</name> + <value>true</value> + </property> + <property> + <name>hbase.zookeeper.quorum</name> + <value>zookeeper</value> + </property> + <property> + <name>hbase.zookeeper.property.clientPort</name> + <value>2181</value> + </property> + <property> + <name>hbase.unsafe.stream.capability.enforce</name> + <value>false</value> + </property> + <property> + <name>hbase.wal.provider</name> + <value>filesystem</value> + </property> + + <!-- Phoenix 5.x required configuration. --> + <property> + <name>hbase.coprocessor.master.classes</name> + <value>org.apache.phoenix.coprocessor.PhoenixMasterObserver</value> + </property> + <property> + <name>hbase.coprocessor.regionserver.classes</name> + <value>org.apache.phoenix.coprocessor.PhoenixRegionServerEndpoint</value> + </property> + <property> + <name>hbase.regionserver.wal.codec</name> + <value>org.apache.hadoop.hbase.regionserver.wal.IndexedWALEditCodec</value> + </property> + <property> + <name>hbase.region.server.rpc.scheduler.factory.class</name> + <value>org.apache.hadoop.hbase.ipc.PhoenixRpcSchedulerFactory</value> + </property> + <property> + <name>hbase.rpc.controllerfactory.class</name> + <value>org.apache.hadoop.hbase.ipc.controller.ServerRpcControllerFactory</value> + </property> + <property> + <name>phoenix.task.handling.interval.ms</name> + <value>10</value> + </property> + <property> + <name>phoenix.task.handling.initial.delay.ms</name> + <value>1</value> + </property> +</configuration> diff --git a/docker/conf/phoenix-adapters/hbase-site.xml b/docker/conf/phoenix-adapters/hbase-site.xml new file mode 100644 index 0000000..fd993b9 --- /dev/null +++ b/docker/conf/phoenix-adapters/hbase-site.xml @@ -0,0 +1,21 @@ +<?xml version="1.0"?> +<?xml-stylesheet type="text/xsl" href="configuration.xsl"?> +<!-- Must match the WAL codec and RPC controller settings on the server. --> +<configuration> + <property> + <name>hbase.regionserver.wal.codec</name> + <value>org.apache.hadoop.hbase.regionserver.wal.IndexedWALEditCodec</value> + </property> + <property> + <name>hbase.rpc.controllerfactory.class</name> + <value>org.apache.hadoop.hbase.ipc.controller.ClientRpcControllerFactory</value> + </property> + <property> + <name>phoenix.task.handling.interval.ms</name> + <value>10</value> + </property> + <property> + <name>phoenix.task.handling.initial.delay.ms</name> + <value>1</value> + </property> +</configuration> diff --git a/docker/docker-compose.yml b/docker/docker-compose.yml new file mode 100644 index 0000000..794cb6e --- /dev/null +++ b/docker/docker-compose.yml @@ -0,0 +1,186 @@ +# Bring up from the project root: +# docker compose -f docker/docker-compose.yml up --build +# +# Cluster only (no REST): +# docker compose -f docker/docker-compose.yml up --build \ +# zookeeper namenode datanode hbase-master hbase-regionserver + +name: phoenix-adapters + +services: + + zookeeper: + image: zookeeper:3.8.4 + container_name: phx-zookeeper + hostname: zookeeper + environment: + ZOO_4LW_COMMANDS_WHITELIST: "srvr,ruok,mntr,conf" + ZOO_ADMINSERVER_ENABLED: "false" + ZOO_AUTOPURGE_PURGEINTERVAL: 24 + ZOO_AUTOPURGE_SNAPRETAINCOUNT: 3 + ports: + # Host port shifted off the default; 2181 is often busy on dev machines. + - "12181:2181" + volumes: + - zookeeper-data:/data + - zookeeper-datalog:/datalog + healthcheck: + test: ["CMD-SHELL", "echo ruok | nc -w 2 localhost 2181 | grep -q imok"] + interval: 5s + timeout: 5s + retries: 20 + networks: [phoenix-net] + + # apache/hadoop:3.3.6 is amd64-only; on Apple Silicon Docker emulates + # via Rosetta/qemu (slower but functional). + namenode: + image: apache/hadoop:3.3.6 + platform: linux/amd64 + container_name: phx-namenode + hostname: namenode + environment: + # Triggers a first-time `hdfs namenode -format` when this dir is empty. + ENSURE_NAMENODE_DIR: /data/namenode + # The apache/hadoop image templates *-SITE.XML files from these env vars. + CORE-SITE.XML_fs.defaultFS: "hdfs://namenode:9000" + HDFS-SITE.XML_dfs.replication: "1" + HDFS-SITE.XML_dfs.namenode.name.dir: "file:///data/namenode" + HDFS-SITE.XML_dfs.datanode.data.dir: "file:///data/datanode" + HDFS-SITE.XML_dfs.permissions.enabled: "false" + HDFS-SITE.XML_dfs.namenode.datanode.registration.ip-hostname-check: "false" + HDFS-SITE.XML_dfs.client.use.datanode.hostname: "true" + HDFS-SITE.XML_dfs.datanode.use.datanode.hostname: "true" + command: ["hdfs", "namenode"] + ports: + - "9870:9870" + # Host port shifted off 9000 (macOS AirPlay et al). + - "19000:9000" + volumes: + - namenode-data:/data + healthcheck: + # Hadoop binds to the hostname, not localhost. + test: ["CMD-SHELL", "nc -z namenode 9000 || exit 1"] + interval: 5s + timeout: 5s + retries: 30 + networks: [phoenix-net] + + datanode: + image: apache/hadoop:3.3.6 + platform: linux/amd64 + container_name: phx-datanode + hostname: datanode + depends_on: + namenode: + condition: service_healthy + environment: + WAITFOR: namenode:9000 + CORE-SITE.XML_fs.defaultFS: "hdfs://namenode:9000" + HDFS-SITE.XML_dfs.replication: "1" + HDFS-SITE.XML_dfs.namenode.name.dir: "file:///data/namenode" + HDFS-SITE.XML_dfs.datanode.data.dir: "file:///data/datanode" + HDFS-SITE.XML_dfs.permissions.enabled: "false" + HDFS-SITE.XML_dfs.client.use.datanode.hostname: "true" + HDFS-SITE.XML_dfs.datanode.use.datanode.hostname: "true" + command: ["hdfs", "datanode"] + ports: + - "9864:9864" + volumes: + - datanode-data:/data + healthcheck: + test: ["CMD-SHELL", "nc -z datanode 9866 || exit 1"] + interval: 5s + timeout: 5s + retries: 30 + networks: [phoenix-net] + + hbase-master: + image: phoenix-adapters/hbase-phoenix:latest + build: + context: . + dockerfile: Dockerfile.hbase-phoenix + container_name: phx-hbase-master + hostname: hbase-master + command: ["hbase-master"] + depends_on: + zookeeper: + condition: service_healthy + namenode: + condition: service_healthy + datanode: + condition: service_started + ports: + - "16000:16000" + - "16010:16010" + healthcheck: + test: ["CMD-SHELL", "nc -z hbase-master 16000 || exit 1"] + interval: 10s + timeout: 5s + retries: 30 + networks: [phoenix-net] + + hbase-regionserver: + image: phoenix-adapters/hbase-phoenix:latest + build: + context: . + dockerfile: Dockerfile.hbase-phoenix + container_name: phx-hbase-regionserver + hostname: hbase-regionserver + command: ["hbase-regionserver"] + depends_on: + hbase-master: + condition: service_healthy + ports: + - "16020:16020" + - "16030:16030" + healthcheck: + test: ["CMD-SHELL", "nc -z hbase-regionserver 16020 || exit 1"] + interval: 5s + timeout: 5s + retries: 30 + networks: [phoenix-net] + + phoenix-adapters-rest: + image: phoenix-adapters/rest:latest + build: + context: .. + dockerfile: docker/Dockerfile.phoenix-adapters + container_name: phx-adapters-rest + hostname: phoenix-adapters-rest + depends_on: + hbase-master: + condition: service_healthy + hbase-regionserver: + condition: service_healthy + environment: + - ZOO_KEEPER_QUORUM=zookeeper:2181 + - PHOENIX_REST_PORT=8842 + - HBASE_MASTER_HOST=hbase-master + - HBASE_MASTER_PORT=16000 + ports: + - "8842:8842" + # Probes the real API: only "healthy" once Phoenix has bootstrapped + # SYSTEM.* tables and Jetty is accepting POSTs. + healthcheck: + test: + - "CMD-SHELL" + - >- + curl -fs -m 3 -X POST http://localhost:8842/ + -H 'Content-Type: application/x-amz-json-1.0' + -H 'X-Amz-Target: DynamoDB_20120810.ListTables' + -d '{}' || exit 1 + interval: 5s + timeout: 5s + retries: 60 + start_period: 30s + networks: [phoenix-net] + +volumes: + zookeeper-data: + zookeeper-datalog: + namenode-data: + datanode-data: + +networks: + phoenix-net: + driver: bridge diff --git a/docker/scripts/hbase-entrypoint.sh b/docker/scripts/hbase-entrypoint.sh new file mode 100644 index 0000000..ed21d07 --- /dev/null +++ b/docker/scripts/hbase-entrypoint.sh @@ -0,0 +1,55 @@ +#!/usr/bin/env bash +# +# Usage: entrypoint.sh <role> +# role := hbase-master | hbase-regionserver | bash | help +# +set -euo pipefail + +ROLE="${1:-help}" + +log() { echo "[hbase-entrypoint][$(date -u +%H:%M:%S)] $*"; } +fail() { log "ERROR: $*"; exit 1; } + +wait_for() { + local host="$1" port="$2" + log "Waiting for ${host}:${port} ..." + until nc -z "${host}" "${port}" 2>/dev/null; do + sleep 2 + done + log "${host}:${port} is reachable." +} + +case "${ROLE}" in + hbase-master) + wait_for "${ZOOKEEPER_HOST:-zookeeper}" "${ZOOKEEPER_PORT:-2181}" + wait_for "${NAMENODE_HOST:-namenode}" "${NAMENODE_PORT:-9000}" + exec "${HBASE_HOME}/bin/hbase" master start + ;; + + hbase-regionserver) + wait_for "${ZOOKEEPER_HOST:-zookeeper}" "${ZOOKEEPER_PORT:-2181}" + wait_for "${HMASTER_HOST:-hbase-master}" "${HMASTER_PORT:-16000}" + exec "${HBASE_HOME}/bin/hbase" regionserver start + ;; + + bash|shell) + exec /bin/bash + ;; + + help|*) + cat <<EOF +Usage: docker run ... phoenix-adapters/hbase-phoenix:latest <role> + +Roles: + hbase-master Run the HBase Master. + hbase-regionserver Run an HBase RegionServer. + bash Drop into a shell inside the image. + +Versions: + HBase ${HBASE_VERSION}-${HBASE_FLAVOR} + Phoenix ${PHOENIX_VERSION} (phoenix-hbase-${PHOENIX_HBASE_LINE}) +EOF + [[ "${ROLE}" == "help" ]] && exit 0 + fail "Unknown role: ${ROLE}" + ;; +esac diff --git a/docker/scripts/phoenix-adapters-entrypoint.sh b/docker/scripts/phoenix-adapters-entrypoint.sh new file mode 100644 index 0000000..1c4685d --- /dev/null +++ b/docker/scripts/phoenix-adapters-entrypoint.sh @@ -0,0 +1,43 @@ +#!/usr/bin/env bash +set -euo pipefail + +log() { echo "[phoenix-adapters][$(date -u +%H:%M:%S)] $*"; } + +wait_for() { + local host="$1" port="$2" + log "Waiting for ${host}:${port} ..." + until nc -z "${host}" "${port}" 2>/dev/null; do + sleep 2 + done + log "${host}:${port} is reachable." +} + +zk_quorum="${ZOO_KEEPER_QUORUM:-zookeeper:2181}" +zk_host="${zk_quorum%%:*}" +zk_port="${zk_quorum##*:}" +[[ "${zk_host}" == "${zk_port}" ]] && zk_port=2181 + +wait_for "${zk_host}" "${zk_port}" +wait_for "${HBASE_MASTER_HOST:-hbase-master}" "${HBASE_MASTER_PORT:-16000}" + +# Give the master a moment to finish initialising hbase:meta before the +# first Phoenix connection bootstraps SYSTEM.* tables. +sleep "${PHOENIX_BOOTSTRAP_SLEEP_SECONDS:-5}" + +log "Starting Phoenix Adapters REST on :${PHOENIX_REST_PORT} (ZK=${zk_quorum})" + +CLASSPATH="${PHOENIX_ADAPTERS_CONF_DIR}:${PHOENIX_ADAPTERS_HOME}/lib/*" + +exec "${JAVA_HOME}/bin/java" \ + -Dproc_rest \ + -XX:+UseG1GC \ + -XX:OnOutOfMemoryError="kill -9 %p" \ + -XX:+HeapDumpOnOutOfMemoryError \ + -XX:HeapDumpPath="${PHOENIX_ADAPTERS_LOG_DIR}" \ + -Dphoenix.adapters.log.dir="${PHOENIX_ADAPTERS_LOG_DIR}" \ + -Dlog4j2.configurationFile="file:${PHOENIX_ADAPTERS_CONF_DIR}/log4j2.properties" \ + -cp "${CLASSPATH}" \ + org.apache.phoenix.ddb.rest.RESTServer \ + start \ + -p "${PHOENIX_REST_PORT}" \ + -z "${zk_quorum}" diff --git a/docker/scripts/smoke.sh b/docker/scripts/smoke.sh new file mode 100755 index 0000000..051839f --- /dev/null +++ b/docker/scripts/smoke.sh @@ -0,0 +1,311 @@ +#!/usr/bin/env bash +# +# Phoenix Adapters DynamoDB validation suite. +# +# Hits every supported API against the dockerized REST server and asserts +# the expected behaviour. Prints each request, response, and assertion in +# a readable format. Exits 0 on full pass; exits non-zero on the first +# failed assertion (and dumps the offending response). +# +# Usage: docker/scripts/smoke.sh [label] +# +# Requires the cluster to already be up (see docker/README.md). +# Requires: jq, curl. +# +set -euo pipefail + +URL="${PHX_URL:-http://localhost:8842}" +LABEL="${1:-}" +TBL="Smoke${LABEL}" +CT='Content-Type: application/x-amz-json-1.0' +TARGET='X-Amz-Target: DynamoDB_20120810' +TOTAL=18 + +if ! command -v jq >/dev/null 2>&1; then + echo "smoke.sh: jq is required but not on PATH" >&2 + exit 2 +fi + +# ─── ANSI helpers ──────────────────────────────────────────────────────────── +B='\033[1m' # bold +DIM='\033[2m' # dim +CYAN='\033[1;36m' +GREEN='\033[32m' +RED='\033[31m' +RESET='\033[0m' +RULE='─────────────────────────────────────────────────────────────' +BAR='━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━' + +STEP=0 +PASS=0 + +banner() { + printf "\n${CYAN}%s${RESET}\n" "$BAR" + printf "${CYAN} %s${RESET}\n" "$1" + while [[ $# -gt 1 ]]; do shift; printf "${CYAN} %s${RESET}\n" "$1"; done + printf "${CYAN}%s${RESET}\n" "$BAR" +} + +step() { + STEP=$((STEP + 1)) + printf "\n${CYAN}[%2d/%2d]${RESET} ${B}%s${RESET}\n" "$STEP" "$TOTAL" "$1" + printf "${DIM}%s${RESET}\n" "$RULE" +} + +show_json() { + local label="$1" body="$2" + printf " ${DIM}%s:${RESET}\n" "$label" + if printf '%s' "$body" | jq . >/dev/null 2>&1; then + printf '%s' "$body" | jq . | sed 's/^/ /' + else + printf " %s\n" "$body" + fi +} + +LAST_RESP="" + +# Prints request + response visually and stashes the raw JSON in LAST_RESP. +ddb() { + local action="$1" body="$2" + show_json "request " "$body" + LAST_RESP=$(curl -sS -X POST "$URL/" -H "$CT" -H "$TARGET.$action" -d "$body") + show_json "response" "$LAST_RESP" +} + +assert_eq() { + local label="$1" actual="$2" expected="$3" + if [[ "$actual" == "$expected" ]]; then + printf " ${GREEN}✓${RESET} %s ${B}==${RESET} %s\n" "$label" "$expected" + PASS=$((PASS + 1)) + else + printf " ${RED}✗${RESET} %s ${B}expected${RESET} %s, ${B}got${RESET} %s\n" \ + "$label" "$expected" "$actual" >&2 + exit 1 + fi +} + +assert_nonempty() { + local label="$1" value="$2" + if [[ -n "$value" && "$value" != "null" ]]; then + printf " ${GREEN}✓${RESET} %s present (%s)\n" "$label" "$value" + PASS=$((PASS + 1)) + else + printf " ${RED}✗${RESET} %s missing\n" "$label" >&2 + exit 1 + fi +} + +assert_ge() { + local label="$1" actual="$2" threshold="$3" + if [[ "$actual" -ge "$threshold" ]]; then + printf " ${GREEN}✓${RESET} %s ${B}>=${RESET} %s (got %s)\n" "$label" "$threshold" "$actual" + PASS=$((PASS + 1)) + else + printf " ${RED}✗${RESET} %s expected >= %s, got %s\n" "$label" "$threshold" "$actual" >&2 + exit 1 + fi +} + +banner "Phoenix Adapters DynamoDB Validation Suite" \ + "Endpoint : $URL" \ + "Table : $TBL" + +# ─── Confirm the REST server is up before exercising the API ──────────────── +# When the stack is launched with `docker compose up --wait` the +# phoenix-adapters-rest healthcheck has already ensured readiness; this +# check returns almost immediately in that case. Otherwise we probe +# ListTables until it responds (cold-start bootstrap takes ~30-60s). +TIMEOUT=180 +SPIN=( '⠋' '⠙' '⠹' '⠸' '⠼' '⠴' '⠦' '⠧' '⠇' '⠏' ) +ready=false +printf "\n" +for i in $(seq 1 $TIMEOUT); do + if curl -fs -m 3 -X POST "$URL/" \ + -H "$CT" -H "$TARGET.ListTables" -d '{}' >/dev/null 2>&1; then + printf "\r${GREEN}✓${RESET} REST server is ready at %s (verified in %ds) \n" "$URL" "$i" + ready=true + break + fi + printf "\r${DIM}%s${RESET} Confirming REST server is ready at %s ${DIM}(%ds elapsed)${RESET}" \ + "${SPIN[$((i % ${#SPIN[@]}))]}" "$URL" "$i" + sleep 1 +done +if ! $ready; then + printf "\n${RED}✗ REST server did not become ready within %ds at %s${RESET}\n" "$TIMEOUT" "$URL" >&2 + printf "${DIM}Last 30 lines of phx-adapters-rest:${RESET}\n" >&2 + docker logs phx-adapters-rest 2>&1 | tail -30 >&2 || true + exit 1 +fi + +############################################################################### +# CRUD +############################################################################### + +step "ListTables (baseline)" +ddb ListTables '{}' + +step "CreateTable (streams enabled, NEW_AND_OLD_IMAGES)" +ddb CreateTable "$(cat <<EOF +{ + "TableName": "$TBL", + "AttributeDefinitions": [{"AttributeName":"id","AttributeType":"S"}], + "KeySchema": [{"AttributeName":"id","KeyType":"HASH"}], + "BillingMode": "PAY_PER_REQUEST", + "StreamSpecification": {"StreamEnabled": true, "StreamViewType": "NEW_AND_OLD_IMAGES"} +} +EOF +)" +assert_eq "TableStatus" "$(jq -r '.TableDescription.TableStatus' <<<"$LAST_RESP")" "ACTIVE" + +step "DescribeTable" +ddb DescribeTable "{\"TableName\":\"$TBL\"}" +assert_eq "StreamSpecification.StreamEnabled" "$(jq -r '.Table.StreamSpecification.StreamEnabled' <<<"$LAST_RESP")" "true" +assert_eq "StreamSpecification.StreamViewType" "$(jq -r '.Table.StreamSpecification.StreamViewType' <<<"$LAST_RESP")" "NEW_AND_OLD_IMAGES" +assert_nonempty "LatestStreamArn" "$(jq -r '.Table.LatestStreamArn // empty' <<<"$LAST_RESP")" + +step "PutItem id=a (Alice, score=10)" +ddb PutItem "{\"TableName\":\"$TBL\",\"Item\":{\"id\":{\"S\":\"a\"},\"name\":{\"S\":\"Alice\"},\"score\":{\"N\":\"10\"}}}" + +step "UpdateItem id=a (SET score=20, bonus=5, ReturnValues=ALL_NEW)" +ddb UpdateItem "$(cat <<EOF +{ + "TableName": "$TBL", + "Key": {"id": {"S": "a"}}, + "UpdateExpression": "SET score = :s, bonus = :b", + "ExpressionAttributeValues": {":s": {"N":"20"}, ":b": {"N":"5"}}, + "ReturnValues": "ALL_NEW" +} +EOF +)" +assert_eq "Attributes.score.N" "$(jq -r '.Attributes.score.N' <<<"$LAST_RESP")" "20" +assert_eq "Attributes.bonus.N" "$(jq -r '.Attributes.bonus.N' <<<"$LAST_RESP")" "5" + +step "GetItem id=a" +ddb GetItem "{\"TableName\":\"$TBL\",\"Key\":{\"id\":{\"S\":\"a\"}}}" +assert_eq "Item.name.S" "$(jq -r '.Item.name.S' <<<"$LAST_RESP")" "Alice" +assert_eq "Item.score.N" "$(jq -r '.Item.score.N' <<<"$LAST_RESP")" "20" +assert_eq "Item.bonus.N" "$(jq -r '.Item.bonus.N' <<<"$LAST_RESP")" "5" + +step "PutItem id=b (Bob, score=7)" +ddb PutItem "{\"TableName\":\"$TBL\",\"Item\":{\"id\":{\"S\":\"b\"},\"name\":{\"S\":\"Bob\"},\"score\":{\"N\":\"7\"}}}" + +step "Scan" +ddb Scan "{\"TableName\":\"$TBL\"}" +assert_eq "Count" "$(jq -r '.Count' <<<"$LAST_RESP")" "2" + +step "Query id = 'a'" +ddb Query "$(cat <<EOF +{ + "TableName": "$TBL", + "KeyConditionExpression": "id = :v", + "ExpressionAttributeValues": {":v": {"S": "a"}} +} +EOF +)" +assert_eq "Count" "$(jq -r '.Count' <<<"$LAST_RESP")" "1" +assert_eq "Items[0].name.S" "$(jq -r '.Items[0].name.S' <<<"$LAST_RESP")" "Alice" + +step "DeleteItem id=b" +ddb DeleteItem "{\"TableName\":\"$TBL\",\"Key\":{\"id\":{\"S\":\"b\"}}}" + +step "Scan (after delete)" +ddb Scan "{\"TableName\":\"$TBL\"}" +assert_eq "Count" "$(jq -r '.Count' <<<"$LAST_RESP")" "1" + +step "BatchWriteItem (put id=c, id=d; delete id=a)" +ddb BatchWriteItem "$(cat <<EOF +{ + "RequestItems": { + "$TBL": [ + {"PutRequest": {"Item": {"id": {"S": "c"}, "name": {"S": "Carol"}}}}, + {"PutRequest": {"Item": {"id": {"S": "d"}, "name": {"S": "Dan"}}}}, + {"DeleteRequest": {"Key": {"id": {"S": "a"}}}} + ] + } +} +EOF +)" +assert_eq "UnprocessedItems (size)" "$(jq -r '.UnprocessedItems // {} | length' <<<"$LAST_RESP")" "0" + +step "Scan (drain all pages after batch)" +total=0; iter_key="" +for page in $(seq 1 10); do + if [[ -z "$iter_key" ]]; then + ddb Scan "{\"TableName\":\"$TBL\"}" + else + ddb Scan "{\"TableName\":\"$TBL\",\"ExclusiveStartKey\":$iter_key}" + fi + n=$(jq -r '.Count // 0' <<<"$LAST_RESP") + total=$((total + n)) + printf " ${DIM}page %d: %d item(s)${RESET}\n" "$page" "$n" + iter_key=$(jq -c '.LastEvaluatedKey // empty' <<<"$LAST_RESP") + [[ -z "$iter_key" ]] && break +done +assert_eq "total Items across all pages" "$total" "2" + +############################################################################### +# Streams API +############################################################################### + +step "ListStreams" +ddb ListStreams "{\"TableName\":\"$TBL\"}" +listed_arn=$(jq -r ".Streams[]? | select(.TableName == \"$TBL\") | .StreamArn" <<<"$LAST_RESP" | head -n1) +assert_nonempty "StreamArn for $TBL" "$listed_arn" + +step "DescribeStream (poll until StreamStatus==ENABLED, max 30s)" +shard_id=""; status="" +for attempt in $(seq 1 15); do + ddb DescribeStream "{\"StreamArn\":\"$listed_arn\"}" + status=$(jq -r '.StreamDescription.StreamStatus // empty' <<<"$LAST_RESP") + if [[ "$status" == "ENABLED" ]]; then + shard_id=$(jq -r '.StreamDescription.Shards[0].ShardId // empty' <<<"$LAST_RESP") + break + fi + printf " ${DIM}attempt %d: status=%s${RESET}\n" "$attempt" "$status" + sleep 2 +done +assert_eq "StreamDescription.StreamStatus" "$status" "ENABLED" +assert_nonempty "StreamDescription.Shards[0].ShardId" "$shard_id" + +step "GetShardIterator (TRIM_HORIZON)" +ddb GetShardIterator "$(cat <<EOF +{ + "StreamArn": "$listed_arn", + "ShardId": "$shard_id", + "ShardIteratorType": "TRIM_HORIZON" +} +EOF +)" +iter=$(jq -r '.ShardIterator // empty' <<<"$LAST_RESP") +assert_nonempty "ShardIterator" "$iter" + +step "GetRecords (drain pages until empty)" +total=0; pages=0; seen_keys="" +while [[ -n "$iter" && "$iter" != "null" && $pages -lt 10 ]]; do + pages=$((pages + 1)) + ddb GetRecords "{\"ShardIterator\":\"$iter\"}" + n=$(jq -r '.Records | length' <<<"$LAST_RESP") + keys=$(jq -r '.Records[]?.dynamodb.Keys.id.S' <<<"$LAST_RESP" | tr '\n' ',' | sed 's/,$//') + [[ -n "$keys" ]] && seen_keys="${seen_keys:+$seen_keys,}$keys" + total=$((total + n)) + printf " ${DIM}page %d: %d record(s) keys=[%s]${RESET}\n" "$pages" "$n" "$keys" + next=$(jq -r '.NextShardIterator // empty' <<<"$LAST_RESP") + if [[ "$next" == "$iter" || -z "$next" || "$next" == "null" ]]; then + break + fi + iter="$next" + [[ $n -eq 0 ]] && break +done +printf " ${DIM}total records: %d keys=[%s]${RESET}\n" "$total" "$seen_keys" +# Expect >= 4 mutations (PutItem-a, UpdateItem-a, PutItem-b, DeleteItem-b) plus +# 3 from the batch (delete-a, put-c, put-d). +assert_ge "stream record count" "$total" "4" + +step "DeleteTable (cleanup)" +ddb DeleteTable "{\"TableName\":\"$TBL\"}" + +############################################################################### +# Summary +############################################################################### + +banner "Result: ${PASS} checks PASSED across ${TOTAL} API calls"
