I get a slight different error when not specifying a schema: Traceback (most recent call last): File "/home/centos/fun-functions/spark-parrallel-read-from-s3/tick.py", line 61, in <module> df = sqlContext.createDataFrame(foo) File "/usr/hdp/2.5.0.0-1245/spark2/python/lib/pyspark.zip/pyspark/sql/context.py", line 299, in createDataFrame File "/usr/hdp/2.5.0.0-1245/spark2/python/lib/pyspark.zip/pyspark/sql/session.py", line 520, in createDataFrame File "/usr/hdp/2.5.0.0-1245/spark2/python/lib/pyspark.zip/pyspark/sql/session.py", line 360, in _createFromRDD File "/usr/hdp/2.5.0.0-1245/spark2/python/lib/pyspark.zip/pyspark/sql/session.py", line 331, in _inferSchema File "/usr/hdp/2.5.0.0-1245/spark2/python/lib/pyspark.zip/pyspark/rdd.py", line 1328, in first File "/usr/hdp/2.5.0.0-1245/spark2/python/lib/pyspark.zip/pyspark/rdd.py", line 1310, in take File "/usr/hdp/2.5.0.0-1245/spark2/python/lib/pyspark.zip/pyspark/context.py", line 941, in runJob File "/usr/hdp/2.5.0.0-1245/spark2/python/lib/pyspark.zip/pyspark/rdd.py", line 2403, in _jrdd File "/usr/hdp/2.5.0.0-1245/spark2/python/lib/pyspark.zip/pyspark/rdd.py", line 2336, in _wrap_function File "/usr/hdp/2.5.0.0-1245/spark2/python/lib/pyspark.zip/pyspark/rdd.py", line 2315, in _prepare_for_python_RDD File "/usr/hdp/2.5.0.0-1245/spark2/python/lib/pyspark.zip/pyspark/serializers.py", line 428, in dumps File "/usr/hdp/2.5.0.0-1245/spark2/python/lib/pyspark.zip/pyspark/cloudpickle.py", line 657, in dumps File "/usr/hdp/2.5.0.0-1245/spark2/python/lib/pyspark.zip/pyspark/cloudpickle.py", line 107, in dump File "/usr/lib64/python2.7/pickle.py", line 224, in dump self.save(obj) File "/usr/lib64/python2.7/pickle.py", line 286, in save f(self, obj) # Call unbound method with explicit self File "/usr/lib64/python2.7/pickle.py", line 562, in save_tuple save(element) File "/usr/lib64/python2.7/pickle.py", line 286, in save f(self, obj) # Call unbound method with explicit self File "/usr/hdp/2.5.0.0-1245/spark2/python/lib/pyspark.zip/pyspark/cloudpickle.py", line 204, in save_function File "/usr/hdp/2.5.0.0-1245/spark2/python/lib/pyspark.zip/pyspark/cloudpickle.py", line 241, in save_function_tuple File "/usr/lib64/python2.7/pickle.py", line 286, in save f(self, obj) # Call unbound method with explicit self File "/usr/lib64/python2.7/pickle.py", line 548, in save_tuple save(element) File "/usr/lib64/python2.7/pickle.py", line 286, in save f(self, obj) # Call unbound method with explicit self File "/usr/lib64/python2.7/pickle.py", line 600, in save_list self._batch_appends(iter(obj)) File "/usr/lib64/python2.7/pickle.py", line 633, in _batch_appends save(x) File "/usr/lib64/python2.7/pickle.py", line 286, in save f(self, obj) # Call unbound method with explicit self File "/usr/hdp/2.5.0.0-1245/spark2/python/lib/pyspark.zip/pyspark/cloudpickle.py", line 204, in save_function File "/usr/hdp/2.5.0.0-1245/spark2/python/lib/pyspark.zip/pyspark/cloudpickle.py", line 241, in save_function_tuple File "/usr/lib64/python2.7/pickle.py", line 286, in save f(self, obj) # Call unbound method with explicit self File "/usr/lib64/python2.7/pickle.py", line 548, in save_tuple save(element) File "/usr/lib64/python2.7/pickle.py", line 286, in save f(self, obj) # Call unbound method with explicit self File "/usr/lib64/python2.7/pickle.py", line 600, in save_list self._batch_appends(iter(obj)) File "/usr/lib64/python2.7/pickle.py", line 633, in _batch_appends save(x) File "/usr/lib64/python2.7/pickle.py", line 286, in save f(self, obj) # Call unbound method with explicit self File "/usr/hdp/2.5.0.0-1245/spark2/python/lib/pyspark.zip/pyspark/cloudpickle.py", line 204, in save_function File "/usr/hdp/2.5.0.0-1245/spark2/python/lib/pyspark.zip/pyspark/cloudpickle.py", line 241, in save_function_tuple File "/usr/lib64/python2.7/pickle.py", line 286, in save f(self, obj) # Call unbound method with explicit self File "/usr/lib64/python2.7/pickle.py", line 548, in save_tuple save(element) File "/usr/lib64/python2.7/pickle.py", line 286, in save f(self, obj) # Call unbound method with explicit self File "/usr/lib64/python2.7/pickle.py", line 600, in save_list self._batch_appends(iter(obj)) File "/usr/lib64/python2.7/pickle.py", line 636, in _batch_appends save(tmp[0]) File "/usr/lib64/python2.7/pickle.py", line 286, in save f(self, obj) # Call unbound method with explicit self File "/usr/hdp/2.5.0.0-1245/spark2/python/lib/pyspark.zip/pyspark/cloudpickle.py", line 198, in save_function File "/usr/hdp/2.5.0.0-1245/spark2/python/lib/pyspark.zip/pyspark/cloudpickle.py", line 246, in save_function_tuple File "/usr/lib64/python2.7/pickle.py", line 286, in save f(self, obj) # Call unbound method with explicit self File "/usr/lib64/python2.7/pickle.py", line 649, in save_dict self._batch_setitems(obj.iteritems()) File "/usr/lib64/python2.7/pickle.py", line 681, in _batch_setitems save(v) File "/usr/lib64/python2.7/pickle.py", line 306, in save rv = reduce(self.proto) File "/usr/hdp/2.5.0.0-1245/spark2/python/lib/py4j-0.10.1-src.zip/py4j/java_gateway.py", line 933, in __call__ File "/usr/hdp/2.5.0.0-1245/spark2/python/lib/pyspark.zip/pyspark/sql/utils.py", line 63, in deco File "/usr/hdp/2.5.0.0-1245/spark2/python/lib/py4j-0.10.1-src.zip/py4j/protocol.py", line 316, in get_return_value py4j.protocol.Py4JError: An error occurred while calling o33.__getnewargs__. Trace: py4j.Py4JException: Method __getnewargs__([]) does not exist at py4j.reflection.ReflectionEngine.getMethod(ReflectionEngine.java:318) at py4j.reflection.ReflectionEngine.getMethod(ReflectionEngine.java:326) at py4j.Gateway.invoke(Gateway.java:272) at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:128) at py4j.commands.CallCommand.execute(CallCommand.java:79) at py4j.GatewayConnection.run(GatewayConnection.java:211) at java.lang.Thread.run(Thread.java:745)
On Sun, Nov 27, 2016 at 8:32 PM, Andrew Holway < andrew.hol...@otternetworks.de> wrote: > Hi, > > Can anyone tell me what is causing this error > Spark 2.0.0 > Python 2.7.5 > > df = sqlContext.createDataFrame(foo, schema) > https://gist.github.com/mooperd/368e3453c29694c8b2c038d6b7b4413a > > Traceback (most recent call last): > File "/home/centos/fun-functions/spark-parrallel-read-from-s3/tick.py", > line 61, in <module> > df = sqlContext.createDataFrame(foo, schema) > File > "/usr/hdp/2.5.0.0-1245/spark2/python/lib/pyspark.zip/pyspark/sql/context.py", > line 299, in createDataFrame > File > "/usr/hdp/2.5.0.0-1245/spark2/python/lib/pyspark.zip/pyspark/sql/session.py", > line 523, in createDataFrame > File "/usr/hdp/2.5.0.0-1245/spark2/python/lib/pyspark.zip/pyspark/rdd.py", > line 2220, in _to_java_object_rdd > File "/usr/hdp/2.5.0.0-1245/spark2/python/lib/pyspark.zip/pyspark/rdd.py", > line 2403, in _jrdd > File "/usr/hdp/2.5.0.0-1245/spark2/python/lib/pyspark.zip/pyspark/rdd.py", > line 2336, in _wrap_function > File "/usr/hdp/2.5.0.0-1245/spark2/python/lib/pyspark.zip/pyspark/rdd.py", > line 2315, in _prepare_for_python_RDD > File > "/usr/hdp/2.5.0.0-1245/spark2/python/lib/pyspark.zip/pyspark/serializers.py", > line 428, in dumps > File > "/usr/hdp/2.5.0.0-1245/spark2/python/lib/pyspark.zip/pyspark/cloudpickle.py", > line 657, in dumps > File > "/usr/hdp/2.5.0.0-1245/spark2/python/lib/pyspark.zip/pyspark/cloudpickle.py", > line 107, in dump > File "/usr/lib64/python2.7/pickle.py", line 224, in dump > self.save(obj) > File "/usr/lib64/python2.7/pickle.py", line 286, in save > f(self, obj) # Call unbound method with explicit self > File "/usr/lib64/python2.7/pickle.py", line 562, in save_tuple > save(element) > File "/usr/lib64/python2.7/pickle.py", line 286, in save > f(self, obj) # Call unbound method with explicit self > File > "/usr/hdp/2.5.0.0-1245/spark2/python/lib/pyspark.zip/pyspark/cloudpickle.py", > line 204, in save_function > File > "/usr/hdp/2.5.0.0-1245/spark2/python/lib/pyspark.zip/pyspark/cloudpickle.py", > line 241, in save_function_tuple > File "/usr/lib64/python2.7/pickle.py", line 286, in save > f(self, obj) # Call unbound method with explicit self > File "/usr/lib64/python2.7/pickle.py", line 548, in save_tuple > save(element) > File "/usr/lib64/python2.7/pickle.py", line 286, in save > f(self, obj) # Call unbound method with explicit self > File "/usr/lib64/python2.7/pickle.py", line 600, in save_list > self._batch_appends(iter(obj)) > File "/usr/lib64/python2.7/pickle.py", line 633, in _batch_appends > save(x) > File "/usr/lib64/python2.7/pickle.py", line 286, in save > f(self, obj) # Call unbound method with explicit self > File > "/usr/hdp/2.5.0.0-1245/spark2/python/lib/pyspark.zip/pyspark/cloudpickle.py", > line 204, in save_function > File > "/usr/hdp/2.5.0.0-1245/spark2/python/lib/pyspark.zip/pyspark/cloudpickle.py", > line 241, in save_function_tuple > File "/usr/lib64/python2.7/pickle.py", line 286, in save > f(self, obj) # Call unbound method with explicit self > File "/usr/lib64/python2.7/pickle.py", line 548, in save_tuple > save(element) > File "/usr/lib64/python2.7/pickle.py", line 286, in save > f(self, obj) # Call unbound method with explicit self > File "/usr/lib64/python2.7/pickle.py", line 600, in save_list > self._batch_appends(iter(obj)) > File "/usr/lib64/python2.7/pickle.py", line 633, in _batch_appends > save(x) > File "/usr/lib64/python2.7/pickle.py", line 286, in save > f(self, obj) # Call unbound method with explicit self > File > "/usr/hdp/2.5.0.0-1245/spark2/python/lib/pyspark.zip/pyspark/cloudpickle.py", > line 204, in save_function > File > "/usr/hdp/2.5.0.0-1245/spark2/python/lib/pyspark.zip/pyspark/cloudpickle.py", > line 241, in save_function_tuple > File "/usr/lib64/python2.7/pickle.py", line 286, in save > f(self, obj) # Call unbound method with explicit self > File "/usr/lib64/python2.7/pickle.py", line 548, in save_tuple > save(element) > File "/usr/lib64/python2.7/pickle.py", line 286, in save > f(self, obj) # Call unbound method with explicit self > File "/usr/lib64/python2.7/pickle.py", line 600, in save_list > self._batch_appends(iter(obj)) > File "/usr/lib64/python2.7/pickle.py", line 636, in _batch_appends > save(tmp[0]) > File "/usr/lib64/python2.7/pickle.py", line 286, in save > f(self, obj) # Call unbound method with explicit self > File > "/usr/hdp/2.5.0.0-1245/spark2/python/lib/pyspark.zip/pyspark/cloudpickle.py", > line 198, in save_function > File > "/usr/hdp/2.5.0.0-1245/spark2/python/lib/pyspark.zip/pyspark/cloudpickle.py", > line 246, in save_function_tuple > File "/usr/lib64/python2.7/pickle.py", line 286, in save > f(self, obj) # Call unbound method with explicit self > File "/usr/lib64/python2.7/pickle.py", line 649, in save_dict > self._batch_setitems(obj.iteritems()) > File "/usr/lib64/python2.7/pickle.py", line 681, in _batch_setitems > save(v) > File "/usr/lib64/python2.7/pickle.py", line 306, in save > rv = reduce(self.proto) > File > "/usr/hdp/2.5.0.0-1245/spark2/python/lib/py4j-0.10.1-src.zip/py4j/java_gateway.py", > line 933, in __call__ > File > "/usr/hdp/2.5.0.0-1245/spark2/python/lib/pyspark.zip/pyspark/sql/utils.py", > line 63, in deco > File > "/usr/hdp/2.5.0.0-1245/spark2/python/lib/py4j-0.10.1-src.zip/py4j/protocol.py", > line 316, in get_return_value > py4j.protocol.Py4JError: An error occurred while calling > o33.__getnewargs__. Trace: > py4j.Py4JException: Method __getnewargs__([]) does not exist > at py4j.reflection.ReflectionEngine.getMethod(ReflectionEngine.java:318) > at py4j.reflection.ReflectionEngine.getMethod(ReflectionEngine.java:326) > at py4j.Gateway.invoke(Gateway.java:272) > at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:128) > at py4j.commands.CallCommand.execute(CallCommand.java:79) > at py4j.GatewayConnection.run(GatewayConnection.java:211) > at java.lang.Thread.run(Thread.java:745) > > > > -- > Otter Networks UG > http://otternetworks.de > Gotenstraße 17 > 10829 Berlin > -- Otter Networks UG http://otternetworks.de Gotenstraße 17 10829 Berlin