[jira] [Created] (SPARK-21439) Cannot use Spark with Python ABCmeta (exception from cloudpickle)

JIRA Mon, 17 Jul 2017 04:03:52 -0700

Maciej Bryński created SPARK-21439:
--------------------------------------


             Summary: Cannot use Spark with Python ABCmeta (exception from 
cloudpickle)
                 Key: SPARK-21439
                 URL: https://issues.apache.org/jira/browse/SPARK-21439
             Project: Spark
          Issue Type: Bug
          Components: Spark Core
    Affects Versions: 2.1.1
            Reporter: Maciej Bryński


I'm trying to use code with ABCMeta.
This code gives exception as a result.
{code}
from abc import ABCMeta, abstractmethod
class A(metaclass=ABCMeta):
    @abstractmethod
    def x(self):
        """Abstract"""
        
class B(A):
    def x(self):
        return 10

b = B()

sc.range(10).map(lambda x: b.x()).collect()
{code}

Exception:
{code}
---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
/opt/spark/python/pyspark/cloudpickle.py in dump(self, obj)
    146         try:
--> 147             return Pickler.dump(self, obj)
    148         except RuntimeError as e:

/usr/lib/python3.4/pickle.py in dump(self, obj)
    409             self.framer.start_framing()
--> 410         self.save(obj)
    411         self.write(STOP)

/usr/lib/python3.4/pickle.py in save(self, obj, save_persistent_id)
    476         if f is not None:
--> 477             f(self, obj) # Call unbound method with explicit self
    478             return

/usr/lib/python3.4/pickle.py in save_tuple(self, obj)
    741         for element in obj:
--> 742             save(element)
    743 

/usr/lib/python3.4/pickle.py in save(self, obj, save_persistent_id)
    476         if f is not None:
--> 477             f(self, obj) # Call unbound method with explicit self
    478             return

/opt/spark/python/pyspark/cloudpickle.py in save_function(self, obj, name)
    253             if klass is None or klass is not obj:
--> 254                 self.save_function_tuple(obj)
    255                 return

/opt/spark/python/pyspark/cloudpickle.py in save_function_tuple(self, func)
    290         save(_make_skel_func)
--> 291         save((code, closure, base_globals))
    292         write(pickle.REDUCE)

/usr/lib/python3.4/pickle.py in save(self, obj, save_persistent_id)
    476         if f is not None:
--> 477             f(self, obj) # Call unbound method with explicit self
    478             return

/usr/lib/python3.4/pickle.py in save_tuple(self, obj)
    726             for element in obj:
--> 727                 save(element)
    728             # Subtle.  Same as in the big comment below.

/usr/lib/python3.4/pickle.py in save(self, obj, save_persistent_id)
    476         if f is not None:
--> 477             f(self, obj) # Call unbound method with explicit self
    478             return

/usr/lib/python3.4/pickle.py in save_list(self, obj)
    771         self.memoize(obj)
--> 772         self._batch_appends(obj)
    773 

/usr/lib/python3.4/pickle.py in _batch_appends(self, items)
    795                 for x in tmp:
--> 796                     save(x)
    797                 write(APPENDS)

/usr/lib/python3.4/pickle.py in save(self, obj, save_persistent_id)
    476         if f is not None:
--> 477             f(self, obj) # Call unbound method with explicit self
    478             return

/opt/spark/python/pyspark/cloudpickle.py in save_function(self, obj, name)
    253             if klass is None or klass is not obj:
--> 254                 self.save_function_tuple(obj)
    255                 return

/opt/spark/python/pyspark/cloudpickle.py in save_function_tuple(self, func)
    290         save(_make_skel_func)
--> 291         save((code, closure, base_globals))
    292         write(pickle.REDUCE)

/usr/lib/python3.4/pickle.py in save(self, obj, save_persistent_id)
    476         if f is not None:
--> 477             f(self, obj) # Call unbound method with explicit self
    478             return

/usr/lib/python3.4/pickle.py in save_tuple(self, obj)
    726             for element in obj:
--> 727                 save(element)
    728             # Subtle.  Same as in the big comment below.

/usr/lib/python3.4/pickle.py in save(self, obj, save_persistent_id)
    476         if f is not None:
--> 477             f(self, obj) # Call unbound method with explicit self
    478             return

/usr/lib/python3.4/pickle.py in save_list(self, obj)
    771         self.memoize(obj)
--> 772         self._batch_appends(obj)
    773 

/usr/lib/python3.4/pickle.py in _batch_appends(self, items)
    798             elif n:
--> 799                 save(tmp[0])
    800                 write(APPEND)

/usr/lib/python3.4/pickle.py in save(self, obj, save_persistent_id)
    476         if f is not None:
--> 477             f(self, obj) # Call unbound method with explicit self
    478             return

/opt/spark/python/pyspark/cloudpickle.py in save_function(self, obj, name)
    247             #print("save global", islambda(obj), 
obj.__code__.co_filename, modname, themodule)
--> 248             self.save_function_tuple(obj)
    249             return

/opt/spark/python/pyspark/cloudpickle.py in save_function_tuple(self, func)
    295         # save the rest of the func data needed by _fill_function
--> 296         save(f_globals)
    297         save(defaults)

/usr/lib/python3.4/pickle.py in save(self, obj, save_persistent_id)
    476         if f is not None:
--> 477             f(self, obj) # Call unbound method with explicit self
    478             return

/usr/lib/python3.4/pickle.py in save_dict(self, obj)
    811         self.memoize(obj)
--> 812         self._batch_setitems(obj.items())
    813 

/usr/lib/python3.4/pickle.py in _batch_setitems(self, items)
    842                 save(k)
--> 843                 save(v)
    844                 write(SETITEM)

/usr/lib/python3.4/pickle.py in save(self, obj, save_persistent_id)
    521         # Save the reduce() output and finally memoize the object
--> 522         self.save_reduce(obj=obj, *rv)
    523 

/opt/spark/python/pyspark/cloudpickle.py in save_reduce(self, func, args, 
state, listitems, dictitems, obj)
    565             args = args[1:]
--> 566             save(cls)
    567 

/usr/lib/python3.4/pickle.py in save(self, obj, save_persistent_id)
    490             if issc:
--> 491                 self.save_global(obj)
    492                 return

/opt/spark/python/pyspark/cloudpickle.py in save_global(self, obj, name, pack)
    415             self.save(_load_class)
--> 416             self.save_reduce(typ, (obj.__name__, obj.__bases__, 
{"__doc__": obj.__doc__}), obj=obj)
    417             d.pop('__doc__', None)

/opt/spark/python/pyspark/cloudpickle.py in save_reduce(self, func, args, 
state, listitems, dictitems, obj)
    580             save(func)
--> 581             save(args)
    582             write(pickle.REDUCE)

/usr/lib/python3.4/pickle.py in save(self, obj, save_persistent_id)
    476         if f is not None:
--> 477             f(self, obj) # Call unbound method with explicit self
    478             return

/usr/lib/python3.4/pickle.py in save_tuple(self, obj)
    726             for element in obj:
--> 727                 save(element)
    728             # Subtle.  Same as in the big comment below.

/usr/lib/python3.4/pickle.py in save(self, obj, save_persistent_id)
    476         if f is not None:
--> 477             f(self, obj) # Call unbound method with explicit self
    478             return

/usr/lib/python3.4/pickle.py in save_tuple(self, obj)
    726             for element in obj:
--> 727                 save(element)
    728             # Subtle.  Same as in the big comment below.

/usr/lib/python3.4/pickle.py in save(self, obj, save_persistent_id)
    490             if issc:
--> 491                 self.save_global(obj)
    492                 return

/opt/spark/python/pyspark/cloudpickle.py in save_global(self, obj, name, pack)
    430                 dd[k] = v
--> 431             self.save(dd)
    432             self.write(pickle.TUPLE2)

/usr/lib/python3.4/pickle.py in save(self, obj, save_persistent_id)
    476         if f is not None:
--> 477             f(self, obj) # Call unbound method with explicit self
    478             return

/usr/lib/python3.4/pickle.py in save_dict(self, obj)
    811         self.memoize(obj)
--> 812         self._batch_setitems(obj.items())
    813 

/usr/lib/python3.4/pickle.py in _batch_setitems(self, items)
    837                     save(k)
--> 838                     save(v)
    839                 write(SETITEMS)

/usr/lib/python3.4/pickle.py in save(self, obj, save_persistent_id)
    521         # Save the reduce() output and finally memoize the object
--> 522         self.save_reduce(obj=obj, *rv)
    523 

/opt/spark/python/pyspark/cloudpickle.py in save_reduce(self, func, args, 
state, listitems, dictitems, obj)
    598         if state is not None:
--> 599             save(state)
    600             write(pickle.BUILD)

/usr/lib/python3.4/pickle.py in save(self, obj, save_persistent_id)
    476         if f is not None:
--> 477             f(self, obj) # Call unbound method with explicit self
    478             return

/usr/lib/python3.4/pickle.py in save_dict(self, obj)
    811         self.memoize(obj)
--> 812         self._batch_setitems(obj.items())
    813 

/usr/lib/python3.4/pickle.py in _batch_setitems(self, items)
    837                     save(k)
--> 838                     save(v)
    839                 write(SETITEMS)

/usr/lib/python3.4/pickle.py in save(self, obj, save_persistent_id)
    476         if f is not None:
--> 477             f(self, obj) # Call unbound method with explicit self
    478             return

/opt/spark/python/pyspark/cloudpickle.py in save_function(self, obj, name)
    253             if klass is None or klass is not obj:
--> 254                 self.save_function_tuple(obj)
    255                 return

/opt/spark/python/pyspark/cloudpickle.py in save_function_tuple(self, func)
    296         save(f_globals)
--> 297         save(defaults)
    298         save(dct)

/usr/lib/python3.4/pickle.py in save(self, obj, save_persistent_id)
    476         if f is not None:
--> 477             f(self, obj) # Call unbound method with explicit self
    478             return

/usr/lib/python3.4/pickle.py in save_tuple(self, obj)
    726             for element in obj:
--> 727                 save(element)
    728             # Subtle.  Same as in the big comment below.

/usr/lib/python3.4/pickle.py in save(self, obj, save_persistent_id)
    521         # Save the reduce() output and finally memoize the object
--> 522         self.save_reduce(obj=obj, *rv)
    523 

/opt/spark/python/pyspark/cloudpickle.py in save_reduce(self, func, args, 
state, listitems, dictitems, obj)
    565             args = args[1:]
--> 566             save(cls)
    567 

/usr/lib/python3.4/pickle.py in save(self, obj, save_persistent_id)
    476         if f is not None:
--> 477             f(self, obj) # Call unbound method with explicit self
    478             return

/opt/spark/python/pyspark/cloudpickle.py in save_global(self, obj, name, pack)
    430                 dd[k] = v
--> 431             self.save(dd)
    432             self.write(pickle.TUPLE2)

/usr/lib/python3.4/pickle.py in save(self, obj, save_persistent_id)
    476         if f is not None:
--> 477             f(self, obj) # Call unbound method with explicit self
    478             return

/usr/lib/python3.4/pickle.py in save_dict(self, obj)
    811         self.memoize(obj)
--> 812         self._batch_setitems(obj.items())
    813 

/usr/lib/python3.4/pickle.py in _batch_setitems(self, items)
    837                     save(k)
--> 838                     save(v)
    839                 write(SETITEMS)

/usr/lib/python3.4/pickle.py in save(self, obj, save_persistent_id)
    476         if f is not None:
--> 477             f(self, obj) # Call unbound method with explicit self
    478             return

/opt/spark/python/pyspark/cloudpickle.py in save_builtin_function(self, obj)
    366             return self.save_global(obj)
--> 367         return self.save_function(obj)
    368     dispatch[types.BuiltinFunctionType] = save_builtin_function

/opt/spark/python/pyspark/cloudpickle.py in save_function(self, obj, name)
    245         # reference (as is done in default pickler), via 
save_function_tuple.
--> 246         if islambda(obj) or obj.__code__.co_filename == '<stdin>' or 
themodule is None:
    247             #print("save global", islambda(obj), 
obj.__code__.co_filename, modname, themodule)

AttributeError: 'builtin_function_or_method' object has no attribute '__code__'

During handling of the above exception, another exception occurred:

AttributeError                            Traceback (most recent call last)
<ipython-input-8-9ea6e84ab4cc> in <module>()
----> 1 sc.range(10).map(lambda x: b.x()).collect()

/opt/spark/python/pyspark/rdd.py in collect(self)
    806         """
    807         with SCCallSiteSync(self.context) as css:
--> 808             port = 
self.ctx._jvm.PythonRDD.collectAndServe(self._jrdd.rdd())
    809         return list(_load_from_socket(port, self._jrdd_deserializer))
    810 

/opt/spark/python/pyspark/rdd.py in _jrdd(self)
   2438 
   2439         wrapped_func = _wrap_function(self.ctx, self.func, 
self._prev_jrdd_deserializer,
-> 2440                                       self._jrdd_deserializer, profiler)
   2441         python_rdd = self.ctx._jvm.PythonRDD(self._prev_jrdd.rdd(), 
wrapped_func,
   2442                                              self.preservesPartitioning)

/opt/spark/python/pyspark/rdd.py in _wrap_function(sc, func, deserializer, 
serializer, profiler)
   2371     assert serializer, "serializer should not be empty"
   2372     command = (func, profiler, deserializer, serializer)
-> 2373     pickled_command, broadcast_vars, env, includes = 
_prepare_for_python_RDD(sc, command)
   2374     return sc._jvm.PythonFunction(bytearray(pickled_command), env, 
includes, sc.pythonExec,
   2375                                   sc.pythonVer, broadcast_vars, 
sc._javaAccumulator)

/opt/spark/python/pyspark/rdd.py in _prepare_for_python_RDD(sc, command)
   2357     # the serialized command will be compressed by broadcast
   2358     ser = CloudPickleSerializer()
-> 2359     pickled_command = ser.dumps(command)
   2360     if len(pickled_command) > (1 << 20):  # 1M
   2361         # The broadcast will have same life cycle as created PythonRDD

/opt/spark/python/pyspark/serializers.py in dumps(self, obj)
    458 
    459     def dumps(self, obj):
--> 460         return cloudpickle.dumps(obj, 2)
    461 
    462 

/opt/spark/python/pyspark/cloudpickle.py in dumps(obj, protocol)
    701 
    702     cp = CloudPickler(file,protocol)
--> 703     cp.dump(obj)
    704 
    705     return file.getvalue()

/opt/spark/python/pyspark/cloudpickle.py in dump(self, obj)
    153             raise
    154         except Exception as e:
--> 155             if "'i' format requires" in e.message:
    156                 msg = "Object too large to serialize: " + e.message
    157             else:

AttributeError: 'AttributeError' object has no attribute 'message'
{code}



--
This message was sent by Atlassian JIRA
(v6.4.14#64029)

---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org
For additional commands, e-mail: issues-h...@spark.apache.org

[jira] [Created] (SPARK-21439) Cannot use Spark with Python ABCmeta (exception from cloudpickle)

Reply via email to