This is an automated email from the ASF dual-hosted git repository. liuxun pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/gravitino-playground.git
The following commit(s) were added to refs/heads/main by this push: new b16d683 Add the demo about access control (#104) b16d683 is described below commit b16d6836c71afcae7589839809d801b68b7041d1 Author: roryqi <ror...@apache.org> AuthorDate: Wed Nov 20 15:43:17 2024 +0800 Add the demo about access control (#104) --- README.md | 16 + docker-compose.yaml | 30 +- docker-hive-override.yaml | 68 ++ .../ranger-healthcheck.sh | 31 +- init/gravitino/gravitino.conf | 6 + init/hive/core-site.xml | 46 ++ init/hive/init.sh | 5 + .../gravitino-access-control-example.ipynb | 754 +++++++++++++++++++++ init/jupyter/gravitino-spark-trino-example.ipynb | 1 + init/jupyter/init.sh | 15 +- init/jupyter/jupyter-dependency.sh | 1 - init/jupyter/ranger-spark-security.xml | 44 ++ .../{jupyter-dependency.sh => spark-defaults.conf} | 27 +- init/ranger/hdfsDev.json | 26 + init/ranger/hiveDev.json | 24 + init/ranger/init.sh | 39 ++ playground.sh | 14 +- 17 files changed, 1101 insertions(+), 46 deletions(-) diff --git a/README.md b/README.md index 377b5c4..feab84a 100644 --- a/README.md +++ b/README.md @@ -378,6 +378,22 @@ os.environ["OPENAI_API_KEY"] = "" os.environ["OPENAI_API_BASE"] = "" ``` +### Using Gravitino with Ranger authorization + +Gravitino supports to provide the ability of access control for Hive tables using Ranger plugin. + +For example, there are a manager and staffs in your company. Manager creates a Hive catalog and create different roles. +The manager can give different roles to different staffs. + +You can run the command + +```shell +./playground.sh start --enable-ranger +``` + +The demo is located in the `jupyter` folder, you can open the `gravitino-access-control-example.ipynb` +demo via Jupyter Notebook by [http://localhost:18888](http://localhost:18888). + ## ASF Incubator disclaimer Apache Gravitino is an effort undergoing incubation at The Apache Software Foundation (ASF), sponsored by the Apache Incubator. Incubation is required of all newly accepted projects until a further review indicates that the infrastructure, communications, and decision making process have stabilized in a manner consistent with other successful ASF projects. While incubation status is not necessarily a reflection of the completeness or stability of the code, it does indicate that the proje [...] diff --git a/docker-compose.yaml b/docker-compose.yaml index 9ce4c9d..be97d8d 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -18,7 +18,7 @@ # services: hive: - image: apache/gravitino-playground:hive-2.7.3 + image: apache/gravitino-playground:hive-0.1.14 ports: - "3307:3306" - "19000:9000" @@ -37,6 +37,27 @@ services: retries: 5 start_period: 20s + ranger: + image: apache/gravitino-playground:ranger-0.1.0 + ports: + - "6080:6080" + container_name: playground-ranger + entrypoint: /bin/bash /tmp/ranger/init.sh + volumes: + - ./healthcheck:/tmp/healthcheck + - ./init/ranger:/tmp/ranger + healthcheck: + test: ["CMD", "/tmp/healthcheck/ranger-healthcheck.sh"] + interval: 5s + timeout: 60s + retries: 5 + start_period: 120s + deploy: + resources: + limits: + cpus: "0.5" + memory: 500M + gravitino: image: apache/gravitino:0.7.0-incubating entrypoint: /bin/bash /tmp/gravitino/init.sh @@ -48,7 +69,9 @@ services: - MYSQL_HOST_IP=mysql - HIVE_HOST_IP=hive depends_on: - hive: + ranger : + condition: service_healthy + hive : condition: service_healthy mysql: condition: service_healthy @@ -183,6 +206,3 @@ services: - ./init/grafana/grafana.ini:/etc/grafana/grafana.ini - ./init/grafana/datasources.yaml:/etc/grafana/provisioning/datasources/datasources.yaml - ./init/grafana/dashboards:/etc/grafana/provisioning/dashboards - -volumes: - spark_jars: \ No newline at end of file diff --git a/docker-hive-override.yaml b/docker-hive-override.yaml new file mode 100644 index 0000000..2e2a31b --- /dev/null +++ b/docker-hive-override.yaml @@ -0,0 +1,68 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +services: + hive: + image: apache/gravitino-ci:hive-0.1.14 + ports: + - "3307:3306" + - "19000:9000" + - "19083:9083" + - "60070:50070" + container_name: playground-hive + environment: + - HADOOP_USER_NAME=root + - HIVE_RUNTIME_VERSION=hive3 + - RANGER_SERVER_URL=http://ranger:6080 + - RANGER_HIVE_REPOSITORY_NAME=hiveDev + - RANGER_HDFS_REPOSITORY_NAME=hdfsDev + entrypoint: /bin/bash /tmp/hive/init.sh + volumes: + - ./init/hive:/tmp/hive + healthcheck: + test: ["CMD", "/tmp/check-status.sh"] + interval: 10s + timeout: 60s + retries: 5 + start_period: 20s + + ranger: + image: apache/gravitino-ci:ranger-0.1.1 + ports: + - "6080:6080" + container_name: playground-ranger + entrypoint: /bin/bash /tmp/ranger/init.sh + volumes: + - ./healthcheck:/tmp/healthcheck + - ./init/ranger:/tmp/ranger + healthcheck: + test: ["CMD", "/tmp/healthcheck/ranger-healthcheck.sh"] + interval: 5s + timeout: 60s + retries: 5 + start_period: 120s + deploy: + resources: + limits: + cpus: "0.5" + memory: 500M + + jupyter: + environment: + - RANGER_ENABLE=true + \ No newline at end of file diff --git a/init/jupyter/jupyter-dependency.sh b/healthcheck/ranger-healthcheck.sh similarity index 64% copy from init/jupyter/jupyter-dependency.sh copy to healthcheck/ranger-healthcheck.sh index 40cfee0..6843d93 100755 --- a/init/jupyter/jupyter-dependency.sh +++ b/healthcheck/ranger-healthcheck.sh @@ -1,5 +1,4 @@ #!/bin/bash - # # Licensed to the Apache Software Foundation (ASF) under one # or more contributor license agreements. See the NOTICE file @@ -18,18 +17,24 @@ # specific language governing permissions and limitations # under the License. # +set -ex + +max_attempts=3 +attempt=0 -jupyter_dir="$(dirname "${BASH_SOURCE-$0}")" -jupyter_dir="$( - cd "${jupyter_dir}" >/dev/null - pwd -)" -. "${jupyter_dir}/../common/common.sh" +while [ $attempt -lt $max_attempts ]; do + response=$(curl -s -o /dev/null -w "%{http_code}" -u admin:rangerR0cks! -H "Content-Type: application/json" -X GET http://127.0.0.1:6080/service/public/v2/api/plugins/info) + + echo "Ranger health check ${response}" -# Prepare download packages -if [[ ! -d "${jupyter_dir}/packages" ]]; then - mkdir "${jupyter_dir}/packages" -fi -ls "${jupyter_dir}/packages/" | xargs -I {} rm "${jupyter_dir}/packages/"{} -find "${jupyter_dir}/../spark/packages/" | grep jar | xargs -I {} ln {} "${jupyter_dir}/packages/" + if [[ ${response} -eq 200 ]]; then + exit 0 + else + echo "Attempt $((attempt + 1)) failed..." + sleep 1 + fi + + ((attempt++)) +done +exit 1 diff --git a/init/gravitino/gravitino.conf b/init/gravitino/gravitino.conf index d773783..4cafe21 100755 --- a/init/gravitino/gravitino.conf +++ b/init/gravitino/gravitino.conf @@ -59,6 +59,12 @@ gravitino.entity.store.relational.jdbcPassword = gravitino # The interval in milliseconds to evict the catalog cache gravitino.catalog.cache.evictionIntervalMs = 3600000 +# THE CONFIGURATION FOR authorization +# Whether Gravitino enable authorization or not +gravitino.authorization.enable = true +# The admins of Gravitino service, multiple admins are spitted by comma. +gravitino.authorization.serviceAdmins = anonymous + # THE CONFIGURATION FOR AUXILIARY SERVICE # Auxiliary service names, separate by ',' gravitino.auxService.names = iceberg-rest diff --git a/init/hive/core-site.xml b/init/hive/core-site.xml new file mode 100644 index 0000000..ded121f --- /dev/null +++ b/init/hive/core-site.xml @@ -0,0 +1,46 @@ +<configuration> + <property> + <name>fs.defaultFS</name> + <value>hdfs://__REPLACE__HOST_NAME:9000</value> + </property> + + <property> + <name>name</name> + <value>Development Cluster</value> + </property> + + <property> + <name>hadoop.proxyuser.hive.hosts</name> + <value>*</value> + </property> + + <property> + <name>hadoop.proxyuser.hive.groups</name> + <value>*</value> + </property> + + <property> + <name>hadoop.proxyuser.root.groups</name> + <value>*</value> + </property> + + <property> + <name>hadoop.proxyuser.root.hosts</name> + <value>*</value> + </property> + + <property> + <name>hadoop.proxyuser.gravitino.groups</name> + <value>*</value> + </property> + + <property> + <name>hadoop.proxyuser.gravitino.hosts</name> + <value>*</value> + </property> + + <property> + <name>fs.permissions.umask-mode</name> + <value>002</value> + </property> +</configuration> diff --git a/init/hive/init.sh b/init/hive/init.sh index 8060dfc..9508833 100644 --- a/init/hive/init.sh +++ b/init/hive/init.sh @@ -17,11 +17,16 @@ # under the License. # +# remove command line `tail -f /dev/null` in the `/usr/local/sbin/start.sh` sed -i '$d' /usr/local/sbin/start.sh sed -i '$d' /usr/local/sbin/start.sh +cp /tmp/hive/core-site.xml /tmp/hadoop-conf sed -i "s|hdfs://localhost:9000|hdfs://${HIVE_HOST_IP}:9000|g" /usr/local/hive/conf/hive-site.xml /bin/bash /usr/local/sbin/start.sh hdfs dfs -mkdir -p /user/gravitino hdfs dfs -mkdir -p /user/iceberg/warehouse +useradd -g hdfs lisa +useradd -g hdfs manager +useradd -g hdfs anonymous hdfs dfs -chmod 777 /user/iceberg/warehouse/ tail -f /dev/null diff --git a/init/jupyter/authorization/gravitino-access-control-example.ipynb b/init/jupyter/authorization/gravitino-access-control-example.ipynb new file mode 100644 index 0000000..b91de0b --- /dev/null +++ b/init/jupyter/authorization/gravitino-access-control-example.ipynb @@ -0,0 +1,754 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "df953e1a-b48a-4eda-b099-c89c9449fdad", + "metadata": {}, + "source": [ + "## Gravitino access control" + ] + }, + { + "cell_type": "markdown", + "source": [ + "This demo shows that authorizing the Hive Catalog through Gravitino and then using Spark to query this hive datasource authenticates the user's operation, allowing or denying the user's operation. \n", + " You can log in to the Apache Ranger admin service to see the permissions.\n", + "\n", + "+ Apache Ranger admin service: http://localhost:6080/, the login user name is `admin` and the password is `rangerR0cks!`.\n", + "+ Apache Gravitino access control document: https://gravitino.apache.org/docs/latest/security/access-control" + ], + "metadata": { + "collapsed": false + }, + "id": "d75740e99c5ed90e" + }, + { + "cell_type": "markdown", + "source": [ + "### Add the manager to the metalake" + ], + "metadata": { + "collapsed": false + }, + "id": "42a9fa39f5d9ef03" + }, + { + "cell_type": "code", + "execution_count": null, + "outputs": [], + "source": [ + "import requests\n", + "import json\n", + "\n", + "headers = {\n", + " 'Accept': 'application/vnd.gravitino.v1+json',\n", + " 'Content-Type': 'application/json',\n", + "}\n", + "\n", + "data = {\n", + " \"name\": \"manager\"\n", + "}\n", + "\n", + "response = requests.post('http://gravitino:8090/api/metalakes/metalake_demo/users', headers=headers, data=json.dumps(data))\n", + "\n", + "# print the response text (the content of the requested file):\n", + "print(response.text)" + ], + "metadata": { + "collapsed": false + }, + "id": "b09c15849e20a095" + }, + { + "cell_type": "markdown", + "source": [ + "\n", + "\n", + "### Create a Hive catalog with Ranger authorization" + ], + "metadata": { + "collapsed": false + }, + "id": "cd2dc99e370d1c71" + }, + { + "cell_type": "code", + "execution_count": null, + "outputs": [], + "source": [ + "import requests\n", + "import json\n", + "url = \"http://gravitino:8090/api/metalakes/metalake_demo/catalogs\"\n", + "headers = {\n", + " \"Accept\": \"application/vnd.gravitino.v1+json\",\n", + " \"Content-Type\": \"application/json\",\n", + " \"Authorization\": \"Basic bWFuYWdlcjoxMjM=\",\n", + "}\n", + "data = {\n", + " \"name\": \"catalog_hive_ranger\",\n", + " \"type\": \"RELATIONAL\",\n", + " \"provider\": \"hive\",\n", + " \"comment\": \"comment\",\n", + " \"properties\": {\n", + " \"metastore.uris\": \"thrift://hive:9083\",\n", + " \"authorization-provider\": \"ranger\",\n", + " \"authorization.ranger.admin.url\": \"http://ranger:6080\",\n", + " \"authorization.ranger.auth.type\": \"simple\",\n", + " \"authorization.ranger.username\": \"admin\",\n", + " \"authorization.ranger.password\": \"rangerR0cks!\",\n", + " \"authorization.ranger.service.name\": \"hiveDev\"\n", + " }\n", + "}\n", + "\n", + "response = requests.post(url, headers=headers, data=json.dumps(data))\n", + "\n", + "print(response.text)\n" + ], + "metadata": { + "collapsed": false + }, + "id": "c39ff0b34e25c2e4" + }, + { + "cell_type": "markdown", + "id": "6cdaf5b0-18e1-45df-8e8f-13376e41c421", + "metadata": {}, + "source": [ + "### Install PySpark" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "380aa111-a955-4a48-835f-8a5402bf1a4a", + "metadata": {}, + "outputs": [], + "source": [ + "import pyspark\n", + "import os\n", + "from pyspark.sql import SparkSession\n", + "os.environ['HADOOP_USER_NAME']=\"manager\"\n", + "\n", + "spark = SparkSession.builder \\\n", + " .appName(\"PySpark SQL Example\") \\\n", + " .config(\"spark.plugins\", \"org.apache.gravitino.spark.connector.plugin.GravitinoSparkPlugin\") \\\n", + " .config(\"spark.jars\", \"/tmp/gravitino/packages/iceberg-spark-runtime-3.4_2.12-1.5.2.jar,\\\n", + " /tmp/gravitino/packages/gravitino-spark-connector-runtime-3.4_2.12-0.7.0-incubating.jar,\\\n", + " /tmp/gravitino/packages/kyuubi-spark-authz-shaded_2.12-1.9.2.jar\") \\\n", + " .config(\"spark.sql.gravitino.uri\", \"http://gravitino:8090\") \\\n", + " .config(\"spark.sql.gravitino.metalake\", \"metalake_demo\") \\\n", + " .config(\"spark.sql.gravitino.enableIcebergSupport\", \"true\") \\\n", + " .config(\"spark.sql.catalog.catalog_rest\", \"org.apache.iceberg.spark.SparkCatalog\") \\\n", + " .config(\"spark.sql.catalog.catalog_rest.type\", \"rest\") \\\n", + " .config(\"spark.sql.catalog.catalog_rest.uri\", \"http://gravitino:9001/iceberg/\") \\\n", + " .config(\"spark.locality.wait.node\", \"0\") \\\n", + " .config(\"spark.driver.extraClassPath\", \"/tmp/gravitino\") \\\n", + " .config(\"spark.sql.extensions\", \"org.apache.kyuubi.plugin.spark.authz.ranger.RangerSparkExtension\") \\\n", + " .config(\"spark.sql.warehouse.dir\", \"hdfs://hive:9000/user/hive/warehouse\") \\\n", + " .enableHiveSupport() \\\n", + " .getOrCreate()" + ] + }, + { + "cell_type": "markdown", + "source": [ + "### Show databases list under the catalog_hive" + ], + "metadata": { + "collapsed": false + }, + "id": "499295c00807fb0d" + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d70e6c04-bb61-4b5b-8525-41a4a5a34b54", + "metadata": {}, + "outputs": [], + "source": [ + "spark.sql(\"USE catalog_hive_ranger\")\n", + "spark.sql(\"SHOW DATABASES\").show()" + ] + }, + { + "cell_type": "markdown", + "source": [ + "### Create database access control" + ], + "metadata": { + "collapsed": false + }, + "id": "e33c1f5bdee0b3b1" + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d2fae330-e932-4482-9622-bcc454844ff8", + "metadata": {}, + "outputs": [], + "source": [ + "spark.sql(\"CREATE DATABASE IF NOT EXISTS access_control;\")\n", + "spark.sql(\"USE catalog_hive_ranger\")\n", + "spark.sql(\"SHOW DATABASES\").show()" + ] + }, + { + "cell_type": "markdown", + "source": [ + "### Create table customers" + ], + "metadata": { + "collapsed": false + }, + "id": "f416641cedd311ca" + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c34d2bfc-f13a-4dcc-ae40-f2a2874bfffa", + "metadata": {}, + "outputs": [], + "source": [ + "spark.sql(\"USE access_control;\")\n", + "spark.sql(\"CREATE TABLE customers (customer_id int, customer_name string, customer_email string);\")\n", + "spark.sql(\"SHOW TABLES\").show()" + ] + }, + { + "cell_type": "markdown", + "source": [ + "### Select and insert data for the table" + ], + "metadata": { + "collapsed": false + }, + "id": "68adabbd976e106b" + }, + { + "cell_type": "code", + "execution_count": null, + "outputs": [], + "source": [ + "spark.sql(\"INSERT INTO customers (customer_id, customer_name, customer_email) VALUES (11,'Rory Brown','r...@123.com');\")\n", + "spark.sql(\"INSERT INTO customers (customer_id, customer_name, customer_email) VALUES (12,'Jerry Washington','je...@dt.com');\")\n", + "spark.sql(\"SELECT * FROM customers\").show()" + ], + "metadata": { + "collapsed": false + }, + "id": "7113e44ad213ff45" + }, + { + "cell_type": "markdown", + "source": [ + "### You should click the jupyter button to restart the notebook, we will start a new spark context with user lisa" + ], + "metadata": { + "collapsed": false + }, + "id": "ee84f44711c7a939" + }, + { + "cell_type": "code", + "execution_count": null, + "outputs": [], + "source": [ + "import pyspark\n", + "import os\n", + "from pyspark.sql import SparkSession\n", + "os.environ['HADOOP_USER_NAME']=\"lisa\"\n", + "\n", + "spark = SparkSession.builder \\\n", + " .appName(\"PySpark SQL Example\") \\\n", + " .config(\"spark.plugins\", \"org.apache.gravitino.spark.connector.plugin.GravitinoSparkPlugin\") \\\n", + " .config(\"spark.jars\", \"/tmp/gravitino/packages/iceberg-spark-runtime-3.4_2.12-1.5.2.jar,\\\n", + " /tmp/gravitino/packages/gravitino-spark-connector-runtime-3.4_2.12-0.7.0-incubating.jar,\\\n", + " /tmp/gravitino/packages/kyuubi-spark-authz-shaded_2.12-1.9.2.jar\") \\\n", + " .config(\"spark.sql.gravitino.uri\", \"http://gravitino:8090\") \\\n", + " .config(\"spark.sql.gravitino.metalake\", \"metalake_demo\") \\\n", + " .config(\"spark.sql.gravitino.enableIcebergSupport\", \"true\") \\\n", + " .config(\"spark.sql.catalog.catalog_rest\", \"org.apache.iceberg.spark.SparkCatalog\") \\\n", + " .config(\"spark.sql.catalog.catalog_rest.type\", \"rest\") \\\n", + " .config(\"spark.sql.catalog.catalog_rest.uri\", \"http://gravitino:9001/iceberg/\") \\\n", + " .config(\"spark.locality.wait.node\", \"0\") \\\n", + " .config(\"spark.driver.extraClassPath\", \"/tmp/gravitino\") \\\n", + " .config(\"spark.sql.extensions\", \"org.apache.kyuubi.plugin.spark.authz.ranger.RangerSparkExtension\") \\\n", + " .config(\"spark.sql.warehouse.dir\", \"hdfs://hive:9000/user/hive/warehouse\") \\\n", + " .enableHiveSupport() \\\n", + " .getOrCreate()" + ], + "metadata": { + "collapsed": false + }, + "id": "81f1b47f026aa59d" + }, + { + "cell_type": "markdown", + "source": [ + "#### Add Spark execute user `lisa` into Gravitino\n", + "+ https://gravitino.apache.org/docs/0.6.0-incubating/security/access-control#add-a-user" + ], + "metadata": { + "collapsed": false + }, + "id": "8530bac14f93ef26" + }, + { + "cell_type": "code", + "execution_count": null, + "outputs": [], + "source": [ + "import requests\n", + "import json\n", + "\n", + "headers = {\n", + " 'Accept': 'application/vnd.gravitino.v1+json',\n", + " 'Content-Type': 'application/json',\n", + "}\n", + "\n", + "data = {\n", + " \"name\": \"lisa\"\n", + "}\n", + "\n", + "response = requests.post('http://gravitino:8090/api/metalakes/metalake_demo/users', headers=headers, data=json.dumps(data))\n", + "\n", + "# print the response text (the content of the requested file):\n", + "print(response.text)" + ], + "metadata": { + "collapsed": false + }, + "id": "25ca43caa7aa5a30" + }, + { + "cell_type": "markdown", + "source": [ + "### Create a developer role" + ], + "metadata": { + "collapsed": false + }, + "id": "23b6521d01362ba2" + }, + { + "cell_type": "code", + "execution_count": null, + "outputs": [], + "source": [ + "import requests\n", + "import json\n", + "\n", + "url = \"http://gravitino:8090/api/metalakes/metalake_demo/roles\"\n", + "headers = {\n", + " \"Accept\": \"application/vnd.gravitino.v1+json\",\n", + " \"Content-Type\": \"application/json\",\n", + "}\n", + "data = {\n", + " \"name\": \"developer\",\n", + " \"properties\": {\"k1\": \"v1\"},\n", + " \"securableObjects\": [\n", + " {\n", + " \"fullName\": \"catalog_hive_ranger\",\n", + " \"type\": \"CATALOG\",\n", + " \"privileges\": [\n", + " {\n", + " \"name\": \"USE_CATALOG\",\n", + " \"condition\": \"ALLOW\"\n", + " }\n", + " ]\n", + " },\n", + " {\n", + " \"fullName\": \"catalog_hive_ranger.access_control\",\n", + " \"type\": \"SCHEMA\",\n", + " \"privileges\": [\n", + " {\n", + " \"name\": \"USE_SCHEMA\",\n", + " \"condition\": \"ALLOW\"\n", + " },\n", + " {\n", + " \"name\": \"CREATE_TABLE\",\n", + " \"condition\": \"ALLOW\"\n", + " },\n", + " {\n", + " \"name\": \"MODIFY_TABLE\",\n", + " \"condition\": \"ALLOW\"\n", + " },\n", + " {\n", + " \"name\": \"SELECT_TABLE\",\n", + " \"condition\": \"ALLOW\"\n", + " }\n", + " ]\n", + " }\n", + " ]\n", + "}\n", + "\n", + "response = requests.post(url, headers=headers, data=json.dumps(data))\n", + "\n", + "print(response.text)" + ], + "metadata": { + "collapsed": false + }, + "id": "60527c15252f9bc8" + }, + { + "cell_type": "markdown", + "source": [ + "### Grant role to Spark execute user lisa\n", + "+ https://gravitino.apache.org/docs/0.6.0-incubating/security/access-control#grant-roles-to-a-user" + ], + "metadata": { + "collapsed": false + }, + "id": "ad03479cbb062c80" + }, + { + "cell_type": "code", + "execution_count": null, + "outputs": [], + "source": [ + "import requests\n", + "import json\n", + "\n", + "url = \"http://gravitino:8090/api/metalakes/metalake_demo/permissions/users/lisa/grant\"\n", + "headers = {\n", + " \"Accept\": \"application/vnd.gravitino.v1+json\",\n", + " \"Content-Type\": \"application/json\",\n", + "}\n", + "data = {\n", + " \"roleNames\": [\"developer\"]\n", + "}\n", + "\n", + "response = requests.put(url, headers=headers, data=json.dumps(data))\n", + "\n", + "# print status code and response text\n", + "print(response.status_code)\n", + "print(response.text)" + ], + "metadata": { + "collapsed": false + }, + "id": "822c265e4981cf8d" + }, + { + "cell_type": "markdown", + "source": [ + "### Select and insert data for the table" + ], + "metadata": { + "collapsed": false + }, + "id": "342b2b8b96235f06" + }, + { + "cell_type": "code", + "execution_count": null, + "outputs": [], + "source": [ + "spark.sql(\"USE catalog_hive_ranger;\")\n", + "spark.sql(\"USE access_control;\")\n", + "spark.sql(\"INSERT INTO customers (customer_id, customer_name, customer_email) VALUES (11,'Rory Brown','r...@123.com');\")\n", + "spark.sql(\"INSERT INTO customers (customer_id, customer_name, customer_email) VALUES (12,'Jerry Washington','je...@dt.com');\")\n", + "spark.sql(\"SELECT * FROM customers\").show()" + ], + "metadata": { + "collapsed": false + }, + "id": "968b1f56d7cf3167" + }, + { + "cell_type": "markdown", + "source": [ + "### Create another table" + ], + "metadata": { + "collapsed": false + }, + "id": "e633d159e006740d" + }, + { + "cell_type": "code", + "execution_count": null, + "outputs": [], + "source": [ + "spark.sql(\"CREATE TABLE another_customers (customer_id int, customer_name string, customer_email string);\")\n", + "spark.sql(\"SHOW TABLES;\").show()" + ], + "metadata": { + "collapsed": false + }, + "id": "e2b59346fbf058ce" + }, + { + "cell_type": "markdown", + "source": [ + "### Succeed to drop his table" + ], + "metadata": { + "collapsed": false + }, + "id": "1e5624cfef2048fd" + }, + { + "cell_type": "code", + "execution_count": null, + "outputs": [], + "source": [ + "spark.sql(\"DROP TABLE another_customers;\")\n", + "spark.sql(\"SHOW TABLES;\").show()" + ], + "metadata": { + "collapsed": false + }, + "id": "b62051095545975" + }, + { + "cell_type": "markdown", + "source": [ + "### Fail to drop others' table" + ], + "metadata": { + "collapsed": false + }, + "id": "161c5822bf5b3f67" + }, + { + "cell_type": "code", + "execution_count": null, + "outputs": [], + "source": [ + "from py4j.protocol import Py4JJavaError\n", + "\n", + "try:\n", + " spark.sql(\"DROP TABLE customers;\")\n", + "except Py4JJavaError as e:\n", + " print(\"An error occurred: \", e.java_exception)" + ], + "metadata": { + "collapsed": false + }, + "id": "3d8c1a7563009cdc" + }, + { + "cell_type": "markdown", + "source": [ + "## Change another role for the user" + ], + "metadata": { + "collapsed": false + }, + "id": "1a51eaa11e56d2f7" + }, + { + "cell_type": "markdown", + "source": [ + "### Revoke role from Spark execute user lisa\n", + "+ https://gravitino.apache.org/docs/0.6.0-incubating/security/access-control#revoke-roles-from-a-user" + ], + "metadata": { + "collapsed": false + }, + "id": "5a6bd716b808b53d" + }, + { + "cell_type": "code", + "execution_count": null, + "outputs": [], + "source": [ + "import requests\n", + "import json\n", + "\n", + "url = \"http://gravitino:8090/api/metalakes/metalake_demo/permissions/users/lisa/revoke\"\n", + "headers = {\n", + " \"Accept\": \"application/vnd.gravitino.v1+json\",\n", + " \"Content-Type\": \"application/json\",\n", + "}\n", + "data = {\n", + " \"roleNames\": [\"developer\"]\n", + "}\n", + "\n", + "response = requests.put(url, headers=headers, data=json.dumps(data))\n", + "\n", + "# print status code and response text\n", + "print(response.status_code)\n", + "print(response.text)" + ], + "metadata": { + "collapsed": false + }, + "id": "21229241aa84650a" + }, + { + "cell_type": "markdown", + "source": [ + "### Create a analyst role" + ], + "metadata": { + "collapsed": false + }, + "id": "947303c40f7e8835" + }, + { + "cell_type": "code", + "execution_count": null, + "outputs": [], + "source": [ + "import requests\n", + "import json\n", + "\n", + "url = \"http://gravitino:8090/api/metalakes/metalake_demo/roles\"\n", + "headers = {\n", + " \"Accept\": \"application/vnd.gravitino.v1+json\",\n", + " \"Content-Type\": \"application/json\",\n", + "}\n", + "data = {\n", + " \"name\": \"analyst\",\n", + " \"properties\": {\"k1\": \"v1\"},\n", + " \"securableObjects\": [\n", + " {\n", + " \"fullName\": \"catalog_hive_ranger\",\n", + " \"type\": \"CATALOG\",\n", + " \"privileges\": [\n", + " {\n", + " \"name\": \"USE_CATALOG\",\n", + " \"condition\": \"ALLOW\"\n", + " }\n", + " ]\n", + " },\n", + " {\n", + " \"fullName\": \"catalog_hive_ranger.access_control\",\n", + " \"type\": \"SCHEMA\",\n", + " \"privileges\": [\n", + " {\n", + " \"name\": \"USE_SCHEMA\",\n", + " \"condition\": \"ALLOW\"\n", + " },\n", + " {\n", + " \"name\": \"SELECT_TABLE\",\n", + " \"condition\": \"ALLOW\"\n", + " }\n", + " ]\n", + " }\n", + " ]\n", + "}\n", + "\n", + "response = requests.post(url, headers=headers, data=json.dumps(data))\n", + "\n", + "print(response.text)" + ], + "metadata": { + "collapsed": false + }, + "id": "7117a353c5843a1e" + }, + { + "cell_type": "markdown", + "source": [ + "### Grant a analyst to the user" + ], + "metadata": { + "collapsed": false + }, + "id": "78a722b6539a73ba" + }, + { + "cell_type": "code", + "execution_count": null, + "outputs": [], + "source": [ + "import requests\n", + "import json\n", + "\n", + "url = \"http://gravitino:8090/api/metalakes/metalake_demo/permissions/users/lisa/grant\"\n", + "headers = {\n", + " \"Accept\": \"application/vnd.gravitino.v1+json\",\n", + " \"Content-Type\": \"application/json\",\n", + "}\n", + "data = {\n", + " \"roleNames\": [\"analyst\"]\n", + "}\n", + "\n", + "response = requests.put(url, headers=headers, data=json.dumps(data))\n", + "\n", + "# print status code and response text\n", + "print(response.status_code)\n", + "print(response.text)" + ], + "metadata": { + "collapsed": false + }, + "id": "eab62ec65b36dbc6" + }, + { + "cell_type": "markdown", + "source": [ + "### Succeed to select data from the table" + ], + "metadata": { + "collapsed": false + }, + "id": "251a5c4001b7ecb3" + }, + { + "cell_type": "code", + "execution_count": null, + "outputs": [], + "source": [ + "spark.sql(\"SELECT * FROM customers\").show()" + ], + "metadata": { + "collapsed": false + }, + "id": "9cfb4a73cd36e529" + }, + { + "cell_type": "markdown", + "source": [ + "### Fail to insert the data to the table" + ], + "metadata": { + "collapsed": false + }, + "id": "450de05e4252f16" + }, + { + "cell_type": "code", + "execution_count": null, + "outputs": [], + "source": [ + "from py4j.protocol import Py4JJavaError\n", + "\n", + "try:\n", + " spark.sql(\"INSERT INTO customers (customer_id, customer_name, customer_email) VALUES (11,'Rory Brown','r...@123.com');\")\n", + " spark.sql(\"INSERT INTO customers (customer_id, customer_name, customer_email) VALUES (12,'Jerry Washington','je...@dt.com');\")\n", + "except Py4JJavaError as e:\n", + " print(\"An error occurred: \", e.java_exception)" + ], + "metadata": { + "collapsed": false + }, + "id": "de7d35aa98cb3001" + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.5" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/init/jupyter/gravitino-spark-trino-example.ipynb b/init/jupyter/gravitino-spark-trino-example.ipynb index b68128d..ac88c49 100644 --- a/init/jupyter/gravitino-spark-trino-example.ipynb +++ b/init/jupyter/gravitino-spark-trino-example.ipynb @@ -24,6 +24,7 @@ "trino_host_ip = os.getenv('TRINO_HOST_IP')\n", "spark_host_ip = os.getenv('SPARK_HOST_IP')\n", "spark_home = os.getenv('SPARK_HOME')\n", + "os.environ['HADOOP_USER_NAME']=\"anonymous\"\n", "\n", "spark = SparkSession.builder \\\n", " .appName(\"PySpark SQL Example\") \\\n", diff --git a/init/jupyter/init.sh b/init/jupyter/init.sh index 45e0def..d419a5e 100644 --- a/init/jupyter/init.sh +++ b/init/jupyter/init.sh @@ -16,16 +16,11 @@ # specific language governing permissions and limitations # under the License. # -cp -r /tmp/gravitino/*.ipynb /home/jovyan -export HADOOP_USER_NAME=root -# This needs to be downloaded as root user -wget https://repo1.maven.org/maven2/org/apache/iceberg/iceberg-spark-runtime-3.4_2.12/1.5.2/iceberg-spark-runtime-3.4_2.12-1.5.2.jar -O $SPARK_HOME/jars/iceberg-spark-runtime-3.4_2.12-1.5.2.jar -wget https://repo1.maven.org/maven2/org/apache/gravitino/gravitino-spark-connector-runtime-3.4_2.12/0.6.0-incubating/gravitino-spark-connector-runtime-3.4_2.12-0.6.0-incubating.jar -O $SPARK_HOME/jars/gravitino-spark-connector-runtime-3.4_2.12-0.6.0-incubating.jar - -# in pyspark-notebook, SPARK_HOME is at /usr/local/spark, we need to link it back to /opt/spark -ln -s $SPARK_HOME /opt/spark - -su - jovyan +if [ -z "$RANGER_ENABLE" ]; then + cp -r /tmp/gravitino/*.ipynb /home/jovyan +else + cp -r /tmp/gravitino/authorization/*.ipynb /home/jovyan +fi start-notebook.sh --NotebookApp.token='' diff --git a/init/jupyter/jupyter-dependency.sh b/init/jupyter/jupyter-dependency.sh index 40cfee0..3af9000 100755 --- a/init/jupyter/jupyter-dependency.sh +++ b/init/jupyter/jupyter-dependency.sh @@ -32,4 +32,3 @@ if [[ ! -d "${jupyter_dir}/packages" ]]; then fi ls "${jupyter_dir}/packages/" | xargs -I {} rm "${jupyter_dir}/packages/"{} find "${jupyter_dir}/../spark/packages/" | grep jar | xargs -I {} ln {} "${jupyter_dir}/packages/" - diff --git a/init/jupyter/ranger-spark-security.xml b/init/jupyter/ranger-spark-security.xml new file mode 100644 index 0000000..a1eca15 --- /dev/null +++ b/init/jupyter/ranger-spark-security.xml @@ -0,0 +1,44 @@ +<!-- + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. + --> +<configuration> + <property> + <name>ranger.plugin.spark.policy.rest.url</name> + <value>http://ranger:6080</value> + </property> + + <property> + <name>ranger.plugin.spark.service.name</name> + <value>hiveDev</value> + </property> + + <property> + <name>ranger.plugin.spark.policy.cache.dir</name> + <value>/tmp/policycache</value> + </property> + + <property> + <name>ranger.plugin.spark.policy.pollIntervalMs</name> + <value>500</value> + </property> + + <property> + <name>ranger.plugin.spark.policy.source.impl</name> + <value>org.apache.ranger.admin.client.RangerAdminRESTClient</value> + </property> +</configuration> \ No newline at end of file diff --git a/init/jupyter/jupyter-dependency.sh b/init/jupyter/spark-defaults.conf old mode 100755 new mode 100644 similarity index 54% copy from init/jupyter/jupyter-dependency.sh copy to init/jupyter/spark-defaults.conf index 40cfee0..fb79633 --- a/init/jupyter/jupyter-dependency.sh +++ b/init/jupyter/spark-defaults.conf @@ -1,5 +1,3 @@ -#!/bin/bash - # # Licensed to the Apache Software Foundation (ASF) under one # or more contributor license agreements. See the NOTICE file @@ -19,17 +17,14 @@ # under the License. # -jupyter_dir="$(dirname "${BASH_SOURCE-$0}")" -jupyter_dir="$( - cd "${jupyter_dir}" >/dev/null - pwd -)" -. "${jupyter_dir}/../common/common.sh" - -# Prepare download packages -if [[ ! -d "${jupyter_dir}/packages" ]]; then - mkdir "${jupyter_dir}/packages" -fi -ls "${jupyter_dir}/packages/" | xargs -I {} rm "${jupyter_dir}/packages/"{} -find "${jupyter_dir}/../spark/packages/" | grep jar | xargs -I {} ln {} "${jupyter_dir}/packages/" - +spark.plugins org.apache.gravitino.spark.connector.plugin.GravitinoSparkPlugin +spark.sql.gravitino.uri http://gravitino:8090 +spark.sql.gravitino.metalake metalake_demo +spark.sql.gravitino.enableIcebergSupport true +spark.sql.extensions org.apache.iceberg.spark.extensions.IcebergSparkSessionExtensions +spark.sql.catalog.catalog_rest org.apache.iceberg.spark.SparkCatalog +spark.sql.catalog.catalog_rest.type rest +spark.sql.catalog.catalog_rest.uri http://gravitino:9001/iceberg/ +spark.locality.wait.node 0 +spark.sql.warehouse.dir hdfs://hive:9000/user/hive/warehouse +spark.sql.extensions org.apache.kyuubi.plugin.spark.authz.ranger.RangerSparkExtension diff --git a/init/ranger/hdfsDev.json b/init/ranger/hdfsDev.json new file mode 100755 index 0000000..929f15c --- /dev/null +++ b/init/ranger/hdfsDev.json @@ -0,0 +1,26 @@ +{ + "type" : "hdfs", + "tagUpdateTime" : 12345, + "description" : "...", + "name" : "hdfsDev", + "policyVersion" : 12345, + "policyUpdateTime" : 12345, + "displayName" : "hdfsDev", + "tagVersion" : 12345, + "configs" : { + "username" : "admin", + "password" : "admin", + "hadoop.security.authentication" : "simple", + "hadoop.rpc.protection" : "authentication", + "hadoop.security.authorization" : false, + "fs.default.name" : "hdfs://hive:9000" + }, + "guid" : "101", + "isEnabled" : true, + "createTime" : 12345, + "createdBy" : "...", + "updatedBy" : "...", + "id" : 12345, + "updateTime" : 12345, + "version" : 12345 +} \ No newline at end of file diff --git a/init/ranger/hiveDev.json b/init/ranger/hiveDev.json new file mode 100755 index 0000000..5f76fc6 --- /dev/null +++ b/init/ranger/hiveDev.json @@ -0,0 +1,24 @@ +{ + "type" : "hive", + "tagUpdateTime" : 12345, + "description" : "...", + "name" : "hiveDev", + "policyVersion" : 12345, + "policyUpdateTime" : 12345, + "displayName" : "hiveDev", + "tagVersion" : 12345, + "configs" : { + "username" : "admin", + "password" : "admin", + "jdbc.driverClassName" : "org.apache.hive.jdbc.HiveDriver", + "jdbc.url" : "jdbc:hive2://hive:10000" + }, + "guid" : "100", + "isEnabled" : true, + "createTime" : 12345, + "createdBy" : "...", + "updatedBy" : "...", + "id" : 12345, + "updateTime" : 12345, + "version" : 12345 +} \ No newline at end of file diff --git a/init/ranger/init.sh b/init/ranger/init.sh new file mode 100755 index 0000000..2af345b --- /dev/null +++ b/init/ranger/init.sh @@ -0,0 +1,39 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# + +# remove command line `tail -f /dev/null` in the `/tmp/start-ranger-services.sh` +sed -i '$d' /tmp/start-ranger-services.sh +sed -i '$d' /tmp/start-ranger-services.sh +/tmp/start-ranger-services.sh + +status=0 +while [ $status -ne 1 ]; do + status=$(curl -iv -u admin:rangerR0cks! -H "Content-Type: application/json" -X GET http://127.0.0.1:6080/service/public/v2/api/service 2> /dev/null | grep -c '200 OK') + + if [ "$status" -ne '1' ]; then + sleep 5 + fi +done + +curl -iv -u admin:rangerR0cks! -d @/tmp/ranger/hiveDev.json -H "Content-Type: application/json" -X POST http://127.0.0.1:6080/service/public/v2/api/service +curl -iv -u admin:rangerR0cks! -d @/tmp/ranger/hdfsDev.json -H "Content-Type: application/json" -X POST http://127.0.0.1:6080/service/public/v2/api/service +curl -iv -u admin:rangerR0cks! -H "Content-Type: application/json" -X DELETE http://localhost:6080/service/plugins/policies/1 +curl -iv -u admin:rangerR0cks! -H "Content-Type: application/json" -X DELETE http://localhost:6080/service/plugins/policies/3 +curl -iv -u admin:rangerR0cks! -H "Content-Type: application/json" -X DELETE http://localhost:6080/service/plugins/policies/4 +tail -f /dev/null diff --git a/playground.sh b/playground.sh index a3b4fe4..65828db 100755 --- a/playground.sh +++ b/playground.sh @@ -133,7 +133,12 @@ start() { ;; docker) logSuffix=$(date +%Y%m%d%H%m%s) - docker-compose up --detach + if [ "$enableRanger" == true ]; then + docker-compose -f docker-compose.yaml -f docker-hive-override.yaml up --detach + else + docker-compose up --detach + fi + docker compose logs -f >${playground_dir}/playground-${logSuffix}.log 2>&1 & echo "Check log details: ${playground_dir}/playground-${logSuffix}.log" ;; @@ -188,6 +193,13 @@ start) echo "The playground requires 2 CPU cores, 8 GB of RAM, and 25 GB of disk storage to operate efficiently." read -r -p "Confirm the requirement is available in your OS [Y/n]:" input fi + + if [[ "$4" == "--enable-ranger" || "$3" == "--enable-ranger" ]]; then + enableRanger=true + else + enableRanger=false + fi + case $input in [yY][eE][sS] | [yY]) ;; [nN][oO] | [nN])