Package: release.debian.org Severity: normal Tags: jessie User: release.debian....@packages.debian.org Usertags: pu
Hello stable team, The version of ansible currently in Debian Jessie has a few minor security vulnerabilities in it. I spoke with the security team, and we all agree that the vulnerabilities don't rise to the level where a DSA and security upload is necessary. The resolved security issues are TEMP-0000000-EDD657 and CVE-2015-3908. I've backported the fixes from the current upstream stable branch back to the version in Debian Jessie. Please let me know if it is clear for me to upload to stable. -- System Information: Debian Release: stretch/sid APT prefers testing APT policy: (900, 'testing') Architecture: amd64 (x86_64) Foreign Architectures: i386 Kernel: Linux 4.0.0-2-amd64 (SMP w/8 CPU cores) Locale: LANG=en_US.utf8, LC_CTYPE=en_US.utf8 (charmap=UTF-8) *** /tmp/debdiff.txt agartha 福 ~/Debian/ansible 10128 ◯ : debdiff --from ansible_1.7.2+dfsg-2_all.deb --to ansible_1.7.2+dfsg-2+deb8u1_all.deb [The following lists of changes regard files as different if they have different names, permissions or owners.] Files in second set of .debs but not in first --------------------------------------------- -rw-r--r-- root/root /usr/lib/python2.7/dist- packages/ansible/utils/unicode.py Control files: lines which differ (wdiff format) ------------------------------------------------ Installed-Size: [-3400-] {+3415+} Version: [-1.7.2+dfsg-2-] {+1.7.2+dfsg-2+deb8u1+}
diff --git a/lib/ansible/module_utils/urls.py b/lib/ansible/module_utils/urls.py index 4edab47..ced7239 100644 --- a/lib/ansible/module_utils/urls.py +++ b/lib/ansible/module_utils/urls.py @@ -50,6 +50,15 @@ try: except: HAS_SSL=False +HAS_MATCH_HOSTNAME = True +try: + from ssl import match_hostname, CertificateError +except ImportError: + try: + from backports.ssl_match_hostname import match_hostname, CertificateError + except ImportError: + HAS_MATCH_HOSTNAME = False + import os import re import socket @@ -244,11 +253,13 @@ class SSLValidationHandler(urllib2.BaseHandler): connect_result = s.recv(4096) self.validate_proxy_response(connect_result) ssl_s = ssl.wrap_socket(s, ca_certs=tmp_ca_cert_path, cert_reqs=ssl.CERT_REQUIRED) + match_hostname(ssl_s.getpeercert(), self.hostname) else: self.module.fail_json(msg='Unsupported proxy scheme: %s. Currently ansible only supports HTTP proxies.' % proxy_parts.get('scheme')) else: s.connect((self.hostname, self.port)) ssl_s = ssl.wrap_socket(s, ca_certs=tmp_ca_cert_path, cert_reqs=ssl.CERT_REQUIRED) + match_hostname(ssl_s.getpeercert(), self.hostname) # close the ssl connection #ssl_s.unwrap() s.close() @@ -262,6 +273,9 @@ class SSLValidationHandler(urllib2.BaseHandler): 'Use validate_certs=no or make sure your managed systems have a valid CA certificate installed. ' + \ 'Paths checked for this platform: %s' % ", ".join(paths_checked) ) + except CertificateError: + self.module.fail_json(msg="SSL Certificate does not belong to %s. Make sure the url has a certificate that belongs to it or use validate_certs=no (insecure)" % self.hostname) + try: # cleanup the temp file created, don't worry # if it fails for some reason @@ -290,7 +304,7 @@ def url_argument_spec(): ) -def fetch_url(module, url, data=None, headers=None, method=None, +def fetch_url(module, url, data=None, headers=None, method=None, use_proxy=True, force=False, last_mod_time=None, timeout=10): ''' Fetches a file from an HTTP/FTP server using urllib2 @@ -314,27 +328,30 @@ def fetch_url(module, url, data=None, headers=None, method=None, # FIXME: change the following to use the generic_urlparse function # to remove the indexed references for 'parsed' parsed = urlparse.urlparse(url) - if parsed[0] == 'https': - if not HAS_SSL and validate_certs: + if parsed[0] == 'https' and validate_certs: + if not HAS_SSL: if distribution == 'Redhat': module.fail_json(msg='SSL validation is not available in your version of python. You can use validate_certs=no, however this is unsafe and not recommended. You can also install python-ssl from EPEL') else: module.fail_json(msg='SSL validation is not available in your version of python. You can use validate_certs=no, however this is unsafe and not recommended') - elif validate_certs: - # do the cert validation - netloc = parsed[1] - if '@' in netloc: - netloc = netloc.split('@', 1)[1] - if ':' in netloc: - hostname, port = netloc.split(':', 1) - else: - hostname = netloc - port = 443 - # create the SSL validation handler and - # add it to the list of handlers - ssl_handler = SSLValidationHandler(module, hostname, port) - handlers.append(ssl_handler) + if not HAS_MATCH_HOSTNAME: + module.fail_json(msg='Available SSL validation does not check that the certificate matches the hostname. You can install backports.ssl_match_hostname or update your managed machine to python-2.7.9 or newer. You could also use validate_certs=no, however this is unsafe and not recommended') + + # do the cert validation + netloc = parsed[1] + if '@' in netloc: + netloc = netloc.split('@', 1)[1] + if ':' in netloc: + hostname, port = netloc.split(':', 1) + port = int(port) + else: + hostname = netloc + port = 443 + # create the SSL validation handler and + # add it to the list of handlers + ssl_handler = SSLValidationHandler(module, hostname, port) + handlers.append(ssl_handler) if parsed[0] != 'ftp': username = module.params.get('url_username', '') @@ -383,11 +400,11 @@ def fetch_url(module, url, data=None, headers=None, method=None, else: request = urllib2.Request(url, data) - # add the custom agent header, to help prevent issues - # with sites that block the default urllib agent string + # add the custom agent header, to help prevent issues + # with sites that block the default urllib agent string request.add_header('User-agent', module.params.get('http_agent')) - # if we're ok with getting a 304, set the timestamp in the + # if we're ok with getting a 304, set the timestamp in the # header, otherwise make sure we don't get a cached copy if last_mod_time and not force: tstamp = last_mod_time.strftime('%a, %d %b %Y %H:%M:%S +0000') @@ -419,4 +436,3 @@ def fetch_url(module, url, data=None, headers=None, method=None, info.update(dict(msg="Request failed: %s" % str(e), status=code)) return r, info - diff --git a/lib/ansible/runner/connection_plugins/chroot.py b/lib/ansible/runner/connection_plugins/chroot.py index 38c8af7..6b5b677 100644 --- a/lib/ansible/runner/connection_plugins/chroot.py +++ b/lib/ansible/runner/connection_plugins/chroot.py @@ -1,5 +1,6 @@ # Based on local.py (c) 2012, Michael DeHaan <michael.deh...@gmail.com> # (c) 2013, Maykel Moya <mm...@speedyrails.com> +# (c) 2015, Toshio Kuratomi <tkurat...@ansible.com> # # This file is part of Ansible # @@ -15,16 +16,21 @@ # # You should have received a copy of the GNU General Public License # along with Ansible. If not, see <http://www.gnu.org/licenses/>. +from __future__ import (absolute_import, division, print_function) +__metaclass__ = type import distutils.spawn import traceback import os -import shutil +import shlex import subprocess from ansible import errors from ansible import utils +from ansible.utils.uniode import to_bytes from ansible.callbacks import vvv +BUFSIZE = 65536 + class Connection(object): ''' Local chroot based connections ''' @@ -60,8 +66,25 @@ class Connection(object): return self - def exec_command(self, cmd, tmp_path, sudo_user=None, sudoable=False, executable='/bin/sh', in_data=None, su=None, su_user=None): - ''' run a command on the chroot ''' + def _generate_cmd(self, executable, cmd): + if executable: + local_cmd = [self.chroot_cmd, self.chroot, executable, '-c', cmd] + else: + # Prev to python2.7.3, shlex couldn't handle unicode type strings + cmd = to_bytes(cmd) + cmd = shlex.split(cmd) + local_cmd = [self.chroot_cmd, self.chroot] + local_cmd += cmd + return local_cmd + + def _buffered_exec_command(self, cmd, tmp_path, become_user=None, sudoable=False, executable='/bin/sh', in_data=None, stdin=subprocess.PIPE): + ''' run a command on the chroot. This is only needed for implementing + put_file() get_file() so that we don't have to read the whole file + into memory. + + compared to exec_command() it looses some niceties like being able to + return the process's exit code immediately. + ''' if su or su_user: raise errors.AnsibleError("Internal Error: this module does not support running commands via su") @@ -70,60 +93,68 @@ class Connection(object): raise errors.AnsibleError("Internal Error: this module does not support optimized module pipelining") # We enter chroot as root so sudo stuff can be ignored - - if executable: - local_cmd = [self.chroot_cmd, self.chroot, executable, '-c', cmd] - else: - local_cmd = '%s "%s" %s' % (self.chroot_cmd, self.chroot, cmd) + local_cmd = self._generate_cmd(executable, cmd) vvv("EXEC %s" % (local_cmd), host=self.chroot) - p = subprocess.Popen(local_cmd, shell=isinstance(local_cmd, basestring), + p = subprocess.Popen(local_cmd, shell=False, cwd=self.runner.basedir, - stdin=subprocess.PIPE, + stdin=stdin, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + return p + + def exec_command(self, cmd, tmp_path, become_user=None, sudoable=False, executable='/bin/sh', in_data=None): + ''' run a command on the chroot ''' + + p = self._buffered_exec_command(cmd, tmp_path, become_user, sudoable, executable, in_data) + stdout, stderr = p.communicate() return (p.returncode, '', stdout, stderr) def put_file(self, in_path, out_path): ''' transfer a file from local to chroot ''' - if not out_path.startswith(os.path.sep): - out_path = os.path.join(os.path.sep, out_path) - normpath = os.path.normpath(out_path) - out_path = os.path.join(self.chroot, normpath[1:]) - vvv("PUT %s TO %s" % (in_path, out_path), host=self.chroot) - if not os.path.exists(in_path): - raise errors.AnsibleFileNotFound("file or module does not exist: %s" % in_path) + try: - shutil.copyfile(in_path, out_path) - except shutil.Error: - traceback.print_exc() - raise errors.AnsibleError("failed to copy: %s and %s are the same" % (in_path, out_path)) + with open(in_path, 'rb') as in_file: + try: + p = self._buffered_exec_command('dd of=%s bs=%s' % (out_path, BUFSIZE), None, stdin=in_file) + except OSError: + raise errors.AnsibleError("chroot connection requires dd command in the chroot") + try: + stdout, stderr = p.communicate() + except: + traceback.print_exc() + raise errors.AnsibleError("failed to transfer file %s to %s" % (in_path, out_path)) + if p.returncode != 0: + raise errors.AnsibleError("failed to transfer file %s to %s:\n%s\n%s" % (in_path, out_path, stdout, stderr)) except IOError: - traceback.print_exc() - raise errors.AnsibleError("failed to transfer file to %s" % out_path) + raise errors.AnsibleError("file or module does not exist at: %s" % in_path) def fetch_file(self, in_path, out_path): ''' fetch a file from chroot to local ''' - if not in_path.startswith(os.path.sep): - in_path = os.path.join(os.path.sep, in_path) - normpath = os.path.normpath(in_path) - in_path = os.path.join(self.chroot, normpath[1:]) - vvv("FETCH %s TO %s" % (in_path, out_path), host=self.chroot) - if not os.path.exists(in_path): - raise errors.AnsibleFileNotFound("file or module does not exist: %s" % in_path) + try: - shutil.copyfile(in_path, out_path) - except shutil.Error: - traceback.print_exc() - raise errors.AnsibleError("failed to copy: %s and %s are the same" % (in_path, out_path)) - except IOError: - traceback.print_exc() - raise errors.AnsibleError("failed to transfer file to %s" % out_path) + p = self._buffered_exec_command('dd if=%s bs=%s' % (in_path, BUFSIZE), None) + except OSError: + raise errors.AnsibleError("chroot connection requires dd command in the chroot") + + with open(out_path, 'wb+') as out_file: + try: + chunk = p.stdout.read(BUFSIZE) + while chunk: + out_file.write(chunk) + chunk = p.stdout.read(BUFSIZE) + except: + traceback.print_exc() + raise errors.AnsibleError("failed to transfer file %s to %s" % (in_path, out_path)) + stdout, stderr = p.communicate() + if p.returncode != 0: + raise errors.AnsibleError("failed to transfer file %s to %s:\n%s\n%s" % (in_path, out_path, stdout, stderr)) + def close(self): ''' terminate the connection; nothing to do here ''' diff --git a/lib/ansible/runner/connection_plugins/jail.py b/lib/ansible/runner/connection_plugins/jail.py index b721ad6..685349e 100644 --- a/lib/ansible/runner/connection_plugins/jail.py +++ b/lib/ansible/runner/connection_plugins/jail.py @@ -1,6 +1,7 @@ # Based on local.py (c) 2012, Michael DeHaan <michael.deh...@gmail.com> # and chroot.py (c) 2013, Maykel Moya <mm...@speedyrails.com> # (c) 2013, Michael Scherer <m...@zarb.org> +# (c) 2015, Toshio Kuratomi <tkurat...@ansible.com> # # This file is part of Ansible # @@ -16,17 +17,22 @@ # # You should have received a copy of the GNU General Public License # along with Ansible. If not, see <http://www.gnu.org/licenses/>. +from __future__ import (absolute_import, division, print_function) +__metaclass__ = type import distutils.spawn import traceback import os -import shutil +import shlex import subprocess from ansible import errors +from ansible.utils.unicode import to_bytes from ansible.callbacks import vvv +BUFSIZE = 65536 + class Connection(object): - ''' Local chroot based connections ''' + ''' Local BSD Jail based connections ''' def _search_executable(self, executable): cmd = distutils.spawn.find_executable(executable) @@ -54,8 +60,6 @@ class Connection(object): # remove \n return stdout[:-1] - - def __init__(self, runner, host, port, *args, **kwargs): self.jail = host self.runner = runner @@ -67,7 +71,7 @@ class Connection(object): self.jls_cmd = self._search_executable('jls') self.jexec_cmd = self._search_executable('jexec') - + if not self.jail in self.list_jails(): raise errors.AnsibleError("incorrect jail name %s" % self.jail) @@ -77,9 +81,9 @@ class Connection(object): self.port = port def connect(self, port=None): - ''' connect to the chroot; nothing to do here ''' + ''' connect to the jail; nothing to do here ''' - vvv("THIS IS A LOCAL CHROOT DIR", host=self.jail) + vvv("THIS IS A LOCAL JAIL DIR", host=self.jail) return self @@ -88,11 +92,21 @@ class Connection(object): if executable: local_cmd = [self.jexec_cmd, self.jail, executable, '-c', cmd] else: - local_cmd = '%s "%s" %s' % (self.jexec_cmd, self.jail, cmd) + # Prev to python2.7.3, shlex couldn't handle unicode type strings + cmd = to_bytes(cmd) + cmd = shlex.split(cmd) + local_cmd = [self.jexec_cmd, self.jail] + local_cmd += cmd return local_cmd - def exec_command(self, cmd, tmp_path, sudo_user=None, sudoable=False, executable='/bin/sh', in_data=None, su=None, su_user=None): - ''' run a command on the chroot ''' + def _buffered_exec_command(self, cmd, tmp_path, sudo_user=None, sudoable=False, executable='/bin/sh', in_data=None, su=None, su_user=None, stdin=subprocess.PIPE): + '''run a command on the jail. This is only needed for implementing + put_file() get_file() so that we don't have to read the whole + file into memory. + + compared to the exec_command() it looses some niceties like + being able to return the process' exit code immediately. + ''' if su or su_user: raise errors.AnsibleError("Internal Error: this module does not support running commands via su") @@ -104,47 +118,63 @@ class Connection(object): local_cmd = self._generate_cmd(executable, cmd) vvv("EXEC %s" % (local_cmd), host=self.jail) - p = subprocess.Popen(local_cmd, shell=isinstance(local_cmd, basestring), + p = subprocess.Popen(local_cmd, shell=False, cwd=self.runner.basedir, - stdin=subprocess.PIPE, + stdin=stdin, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + return p - stdout, stderr = p.communicate() - return (p.returncode, '', stdout, stderr) + def exec_command(self, cmd, tmp_path, become_user=None, sudoable=False, executable='/bin/sh', in_data=None): + ''' run a command on the jail ''' - def _normalize_path(self, path, prefix): - if not path.startswith(os.path.sep): - path = os.path.join(os.path.sep, path) - normpath = os.path.normpath(path) - return os.path.join(prefix, normpath[1:]) + p = self._buffered_exec_command(cmd, tmp_path, become_user, sudoable, executable, in_data) - def _copy_file(self, in_path, out_path): - if not os.path.exists(in_path): - raise errors.AnsibleFileNotFound("file or module does not exist: %s" % in_path) - try: - shutil.copyfile(in_path, out_path) - except shutil.Error: - traceback.print_exc() - raise errors.AnsibleError("failed to copy: %s and %s are the same" % (in_path, out_path)) - except IOError: - traceback.print_exc() - raise errors.AnsibleError("failed to transfer file to %s" % out_path) + stdout, stderr = p.communicate() + return (p.returncode, '', stdout, stderr) def put_file(self, in_path, out_path): - ''' transfer a file from local to chroot ''' + ''' transfer a file from local to jail ''' - out_path = self._normalize_path(out_path, self.get_jail_path()) vvv("PUT %s TO %s" % (in_path, out_path), host=self.jail) - self._copy_file(in_path, out_path) + try: + with open(in_path, 'rb') as in_file: + try: + p = self._buffered_exec_command('dd of=%s bs=%s' % (out_path, BUFSIZE), None, stdin=in_file) + except OSError: + raise errors.AnsibleError("jail connection requires dd command in the jail") + try: + stdout, stderr = p.communicate() + except: + traceback.print_exc() + raise errors.AnsibleError("failed to transfer file %s to %s" % (in_path, out_path)) + if p.returncode != 0: + raise errors.AnsibleError("failed to transfer file %s to %s:\n%s\n%s" % (in_path, out_path, stdout, stderr)) + except IOError: + raise errors.AnsibleError("file or module does not exist at: %s" % in_path) def fetch_file(self, in_path, out_path): - ''' fetch a file from chroot to local ''' + ''' fetch a file from jail to local ''' - in_path = self._normalize_path(in_path, self.get_jail_path()) vvv("FETCH %s TO %s" % (in_path, out_path), host=self.jail) - self._copy_file(in_path, out_path) + try: + p = self._buffered_exec_command('dd if=%s bs=%s' % (in_path, BUFSIZE), None) + except OSError: + raise errors.AnsibleError("jail connection requires dd command in the jail") + + with open(out_path, 'wb+') as out_file: + try: + chunk = p.stdout.read(BUFSIZE) + while chunk: + out_file.write(chunk) + chunk = p.stdout.read(BUFSIZE) + except: + traceback.print_exc() + raise errors.AnsibleError("failed to transfer file %s to %s" % (in_path, out_path)) + stdout, stderr = p.communicate() + if p.returncode != 0: + raise errors.AnsibleError("failed to transfer file %s to %s:\n%s\n%s" % (in_path, out_path, stdout, stderr)) def close(self): ''' terminate the connection; nothing to do here ''' diff --git a/lib/ansible/utils/unicode.py b/lib/ansible/utils/unicode.py new file mode 100644 index 0000000..2cff2e5 --- /dev/null +++ b/lib/ansible/utils/unicode.py @@ -0,0 +1,253 @@ +# (c) 2012-2014, Toshio Kuraotmi <a.bad...@gmail.com> +# +# This file is part of Ansible +# +# Ansible is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# Ansible is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with Ansible. If not, see <http://www.gnu.org/licenses/>. + +# Make coding more python3-ish +from __future__ import (absolute_import, division, print_function) +__metaclass__ = type + +from six import string_types, text_type, binary_type, PY3 + +# to_bytes and to_unicode were written by Toshio Kuratomi for the +# python-kitchen library https://pypi.python.org/pypi/kitchen +# They are licensed in kitchen under the terms of the GPLv2+ +# They were copied and modified for use in ansible by Toshio in Jan 2015 +# (simply removing the deprecated features) + +#: Aliases for the utf-8 codec +_UTF8_ALIASES = frozenset(('utf-8', 'UTF-8', 'utf8', 'UTF8', 'utf_8', 'UTF_8', + 'utf', 'UTF', 'u8', 'U8')) +#: Aliases for the latin-1 codec +_LATIN1_ALIASES = frozenset(('latin-1', 'LATIN-1', 'latin1', 'LATIN1', + 'latin', 'LATIN', 'l1', 'L1', 'cp819', 'CP819', '8859', 'iso8859-1', + 'ISO8859-1', 'iso-8859-1', 'ISO-8859-1')) + +# EXCEPTION_CONVERTERS is defined below due to using to_unicode + +if PY3: + basestring = (str, bytes) + +def to_unicode(obj, encoding='utf-8', errors='replace', nonstring=None): + '''Convert an object into a :class:`unicode` string + + :arg obj: Object to convert to a :class:`unicode` string. This should + normally be a byte :class:`str` + :kwarg encoding: What encoding to try converting the byte :class:`str` as. + Defaults to :term:`utf-8` + :kwarg errors: If errors are found while decoding, perform this action. + Defaults to ``replace`` which replaces the invalid bytes with + a character that means the bytes were unable to be decoded. Other + values are the same as the error handling schemes in the `codec base + classes + <http://docs.python.org/library/codecs.html#codec-base-classes>`_. + For instance ``strict`` which raises an exception and ``ignore`` which + simply omits the non-decodable characters. + :kwarg nonstring: How to treat nonstring values. Possible values are: + + :simplerepr: Attempt to call the object's "simple representation" + method and return that value. Python-2.3+ has two methods that + try to return a simple representation: :meth:`object.__unicode__` + and :meth:`object.__str__`. We first try to get a usable value + from :meth:`object.__unicode__`. If that fails we try the same + with :meth:`object.__str__`. + :empty: Return an empty :class:`unicode` string + :strict: Raise a :exc:`TypeError` + :passthru: Return the object unchanged + :repr: Attempt to return a :class:`unicode` string of the repr of the + object + + Default is ``simplerepr`` + + :raises TypeError: if :attr:`nonstring` is ``strict`` and + a non-:class:`basestring` object is passed in or if :attr:`nonstring` + is set to an unknown value + :raises UnicodeDecodeError: if :attr:`errors` is ``strict`` and + :attr:`obj` is not decodable using the given encoding + :returns: :class:`unicode` string or the original object depending on the + value of :attr:`nonstring`. + + Usually this should be used on a byte :class:`str` but it can take both + byte :class:`str` and :class:`unicode` strings intelligently. Nonstring + objects are handled in different ways depending on the setting of the + :attr:`nonstring` parameter. + + The default values of this function are set so as to always return + a :class:`unicode` string and never raise an error when converting from + a byte :class:`str` to a :class:`unicode` string. However, when you do + not pass validly encoded text (or a nonstring object), you may end up with + output that you don't expect. Be sure you understand the requirements of + your data, not just ignore errors by passing it through this function. + ''' + # Could use isbasestring/isunicode here but we want this code to be as + # fast as possible + if isinstance(obj, basestring): + if isinstance(obj, text_type): + return obj + if encoding in _UTF8_ALIASES: + return text_type(obj, 'utf-8', errors) + if encoding in _LATIN1_ALIASES: + return text_type(obj, 'latin-1', errors) + return obj.decode(encoding, errors) + + if not nonstring: + nonstring = 'simplerepr' + if nonstring == 'empty': + return u'' + elif nonstring == 'passthru': + return obj + elif nonstring == 'simplerepr': + try: + simple = obj.__unicode__() + except (AttributeError, UnicodeError): + simple = None + if not simple: + try: + simple = text_type(obj) + except UnicodeError: + try: + simple = obj.__str__() + except (UnicodeError, AttributeError): + simple = u'' + if isinstance(simple, binary_type): + return text_type(simple, encoding, errors) + return simple + elif nonstring in ('repr', 'strict'): + obj_repr = repr(obj) + if isinstance(obj_repr, binary_type): + obj_repr = text_type(obj_repr, encoding, errors) + if nonstring == 'repr': + return obj_repr + raise TypeError('to_unicode was given "%(obj)s" which is neither' + ' a byte string (str) or a unicode string' % + {'obj': obj_repr.encode(encoding, 'replace')}) + + raise TypeError('nonstring value, %(param)s, is not set to a valid' + ' action' % {'param': nonstring}) + +def to_bytes(obj, encoding='utf-8', errors='replace', nonstring=None): + '''Convert an object into a byte :class:`str` + + :arg obj: Object to convert to a byte :class:`str`. This should normally + be a :class:`unicode` string. + :kwarg encoding: Encoding to use to convert the :class:`unicode` string + into a byte :class:`str`. Defaults to :term:`utf-8`. + :kwarg errors: If errors are found while encoding, perform this action. + Defaults to ``replace`` which replaces the invalid bytes with + a character that means the bytes were unable to be encoded. Other + values are the same as the error handling schemes in the `codec base + classes + <http://docs.python.org/library/codecs.html#codec-base-classes>`_. + For instance ``strict`` which raises an exception and ``ignore`` which + simply omits the non-encodable characters. + :kwarg nonstring: How to treat nonstring values. Possible values are: + + :simplerepr: Attempt to call the object's "simple representation" + method and return that value. Python-2.3+ has two methods that + try to return a simple representation: :meth:`object.__unicode__` + and :meth:`object.__str__`. We first try to get a usable value + from :meth:`object.__str__`. If that fails we try the same + with :meth:`object.__unicode__`. + :empty: Return an empty byte :class:`str` + :strict: Raise a :exc:`TypeError` + :passthru: Return the object unchanged + :repr: Attempt to return a byte :class:`str` of the :func:`repr` of the + object + + Default is ``simplerepr``. + + :raises TypeError: if :attr:`nonstring` is ``strict`` and + a non-:class:`basestring` object is passed in or if :attr:`nonstring` + is set to an unknown value. + :raises UnicodeEncodeError: if :attr:`errors` is ``strict`` and all of the + bytes of :attr:`obj` are unable to be encoded using :attr:`encoding`. + :returns: byte :class:`str` or the original object depending on the value + of :attr:`nonstring`. + + .. warning:: + + If you pass a byte :class:`str` into this function the byte + :class:`str` is returned unmodified. It is **not** re-encoded with + the specified :attr:`encoding`. The easiest way to achieve that is:: + + to_bytes(to_unicode(text), encoding='utf-8') + + The initial :func:`to_unicode` call will ensure text is + a :class:`unicode` string. Then, :func:`to_bytes` will turn that into + a byte :class:`str` with the specified encoding. + + Usually, this should be used on a :class:`unicode` string but it can take + either a byte :class:`str` or a :class:`unicode` string intelligently. + Nonstring objects are handled in different ways depending on the setting + of the :attr:`nonstring` parameter. + + The default values of this function are set so as to always return a byte + :class:`str` and never raise an error when converting from unicode to + bytes. However, when you do not pass an encoding that can validly encode + the object (or a non-string object), you may end up with output that you + don't expect. Be sure you understand the requirements of your data, not + just ignore errors by passing it through this function. + ''' + # Could use isbasestring, isbytestring here but we want this to be as fast + # as possible + if isinstance(obj, basestring): + if isinstance(obj, binary_type): + return obj + return obj.encode(encoding, errors) + if not nonstring: + nonstring = 'simplerepr' + + if nonstring == 'empty': + return b'' + elif nonstring == 'passthru': + return obj + elif nonstring == 'simplerepr': + try: + simple = binary_type(obj) + except UnicodeError: + try: + simple = obj.__str__() + except (AttributeError, UnicodeError): + simple = None + if not simple: + try: + simple = obj.__unicode__() + except (AttributeError, UnicodeError): + simple = b'' + if isinstance(simple, text_type): + simple = simple.encode(encoding, 'replace') + return simple + elif nonstring in ('repr', 'strict'): + try: + obj_repr = obj.__repr__() + except (AttributeError, UnicodeError): + obj_repr = b'' + if isinstance(obj_repr, text_type): + obj_repr = obj_repr.encode(encoding, errors) + else: + obj_repr = binary_type(obj_repr) + if nonstring == 'repr': + return obj_repr + raise TypeError('to_bytes was given "%(obj)s" which is neither' + ' a unicode string or a byte string (str)' % {'obj': obj_repr}) + + raise TypeError('nonstring value, %(param)s, is not set to a valid' + ' action' % {'param': nonstring}) + + +# force the return value of a function to be unicode. Use with partial to +# ensure that a filter will return unicode values. +def unicode_wrap(func, *args, **kwargs): + return to_unicode(func(*args, **kwargs), nonstring='passthru')