- Component: Spark Delta, Spark SQL
   - Level: Beginner
   - Scenario: Debug, How-to

*Python in Jupyter:*

import pyspark
import pyspark.sql.functions

from pyspark.sql import SparkSession
spark = (
    SparkSession
        .builder
        .appName("programming")
        .master("local")
        .config("spark.jars.packages", "io.delta:delta-core_2.12:0.7.0")
        .config("spark.sql.extensions",
"io.delta.sql.DeltaSparkSessionExtension")
        .config("spark.sql.catalog.spark_catalog",
"org.apache.spark.sql.delta.catalog.DeltaCatalog")
        .config('spark.ui.port', '4050')
        .getOrCreate()

)
from delta import *

string_20210609 = '''worked_date,worker_id,delete_flag,hours_worked
2021-06-09,1001,Y,7
2021-06-09,1002,Y,3.75
2021-06-09,1003,Y,7.5
2021-06-09,1004,Y,6.25'''

rdd_20210609 = spark.sparkContext.parallelize(string_20210609.split('\n'))

# FILES WILL SHOW UP ON THE LEFT UNDER THE FOLDER ICON IF YOU WANT TO
BROWSE THEM
OUTPUT_DELTA_PATH = './output/delta/'

spark.sql('CREATE DATABASE IF NOT EXISTS EXERCISE')

spark.sql('''
    CREATE TABLE IF NOT EXISTS EXERCISE.WORKED_HOURS(
        worked_date date
        , worker_id int
        , delete_flag string
        , hours_worked double
    ) USING DELTA


    PARTITIONED BY (worked_date)
    LOCATION "{0}"
    '''.format(OUTPUT_DELTA_PATH)
)

*Error Message:*

AnalysisException                         Traceback (most recent call
last)<ipython-input-13-e0469b5852dd> in <module>      4
spark.sql('CREATE DATABASE IF NOT EXISTS EXERCISE')      5 ----> 6
spark.sql('''      7     CREATE TABLE IF NOT EXISTS
EXERCISE.WORKED_HOURS(      8         worked_date date
/Users/kyjan/spark-3.0.3-bin-hadoop2.7\python\pyspark\sql\session.py
in sql(self, sqlQuery)    647         [Row(f1=1, f2=u'row1'),
Row(f1=2, f2=u'row2'), Row(f1=3, f2=u'row3')]    648         """-->
649         return DataFrame(self._jsparkSession.sql(sqlQuery),
self._wrapped)    650     651     @since(2.0)
\Users\kyjan\spark-3.0.3-bin-hadoop2.7\python\lib\py4j-0.10.9-src.zip\py4j\java_gateway.py
in __call__(self, *args)   1302    1303         answer =
self.gateway_client.send_command(command)-> 1304         return_value
= get_return_value(   1305             answer, self.gateway_client,
self.target_id, self.name)   1306
/Users/kyjan/spark-3.0.3-bin-hadoop2.7\python\pyspark\sql\utils.py in
deco(*a, **kw)    132                 # Hide where the exception came
from that shows a non-Pythonic    133                 # JVM exception
message.--> 134                 raise_from(converted)    135
  else:    136                 raise
/Users/kyjan/spark-3.0.3-bin-hadoop2.7\python\pyspark\sql\utils.py in
raise_from(e)
AnalysisException: Cannot create table ('`EXERCISE`.`WORKED_HOURS`').
The associated location ('output/delta') is not empty.;


-- 
Best Wishes,
Kumba Janga

"The only way of finding the limits of the possible is by going beyond them
into the impossible"
-Arthur C. Clarke

Reply via email to