On Dec 18, 4:43 pm, Dave Angel <da...@ieee.org> wrote: > Oscar Del Ben wrote: > > So I'm trying to send a file through webpy and urllib2 but I can't get > > around these UnicodeErrors. Here's the code: > > > # controller > > > x = web.input(video_original={}) > > params = {'foo': x['foo']} > > > files = (('video[original]', 'test', x['video_original'].file.read > > ()),) > > client.upload(upload_url, params, files, access_token()) > > > # client library > > > def __encodeMultipart(self, fields, files): > > """ > > fields is a sequence of (name, value) elements for regular > > form fields. > > files is a sequence of (name, filename, value) elements for > > data to be uploaded as files > > Return (content_type, body) ready for httplib.HTTP instance > > """ > > boundary = mimetools.choose_boundary() > > crlf = '\r\n' > > > l = [] > > for k, v in fields.iteritems(): > > l.append('--' + boundary) > > l.append('Content-Disposition: form-data; name="%s"' % k) > > l.append('') > > l.append(v) > > for (k, f, v) in files: > > l.append('--' + boundary) > > l.append('Content-Disposition: form-data; name="%s"; > > filename="%s"' % (k, f)) > > l.append('Content-Type: %s' % self.__getContentType(f)) > > l.append('') > > l.append(v) > > l.append('--' + boundary + '--') > > l.append('') > > body = crlf.join(l) > > > return boundary, body > > > def __getContentType(self, filename): > > return mimetypes.guess_type(filename)[0] or 'application/octet- > > stream' > > > def upload(self, path, post_params, files, token=None): > > > if token: > > token = oauth.OAuthToken.from_string(token) > > > url = "http://%s%s" % (self.authority, path) > > > (boundary, body) = self.__encodeMultipart(post_params, files) > > > headers = {'Content-Type': 'multipart/form-data; boundary=%s' % > > boundary, > > 'Content-Length': str(len(body)) > > } > > > request = oauth.OAuthRequest.from_consumer_and_token( > > self.consumer, > > token, > > http_method='POST', > > http_url=url, > > parameters=post_params > > ) > > > request.sign_request(oauth.OAuthSignatureMethod_HMAC_SHA1(), > > self.consumer, token) > > > request = urllib2.Request(request.http_url, postdata=body, > > headers=headers) > > request.get_method = lambda: 'POST' > > > return urllib2.urlopen(request) > > > Unfortunately I get two kinds of unicode error, the first one in the > > crlf.join(l): > > > Traceback (most recent call last): > > File "/Users/oscar/projects/work/whitelabel/web/application.py", > > line 242, in process > > return self.handle() > > File "/Users/oscar/projects/work/whitelabel/web/application.py", > > line 233, in handle > > return self._delegate(fn, self.fvars, args) > > File "/Users/oscar/projects/work/whitelabel/web/application.py", > > line 412, in _delegate > > return handle_class(cls) > > File "/Users/oscar/projects/work/whitelabel/web/application.py", > > line 387, in handle_class > > return tocall(*args) > > File "/Users/oscar/projects/work/whitelabel/code.py", line 328, in > > POST > > return simplejson.load(client.upload(upload_url, params, files, > > access_token())) > > File "/Users/oscar/projects/work/whitelabel/oauth_client.py", line > > 131, in upload > > (boundary, body) = self.__encodeMultipart(post_params, files) > > File "/Users/oscar/projects/work/whitelabel/oauth_client.py", line > > 111, in __encodeMultipart > > body = crlf.join(l) > > UnicodeDecodeError: 'ascii' codec can't decode byte 0xb7 in position > > 42: ordinal not in range(128) > > > And here's another one: > > > Traceback (most recent call last): > > File "/Users/oscar/projects/work/whitelabel/web/application.py", > > line 242, in process > > return self.handle() > > File "/Users/oscar/projects/work/whitelabel/web/application.py", > > line 233, in handle > > return self._delegate(fn, self.fvars, args) > > File "/Users/oscar/projects/work/whitelabel/web/application.py", > > line 412, in _delegate > > return handle_class(cls) > > File "/Users/oscar/projects/work/whitelabel/web/application.py", > > line 387, in handle_class > > return tocall(*args) > > File "/Users/oscar/projects/work/whitelabel/code.py", line 328, in > > POST > > return simplejson.load(client.upload(upload_url, params, files, > > access_token())) > > File "/Users/oscar/projects/work/whitelabel/oauth_client.py", line > > 131, in upload > > (boundary, body) = self.__encodeMultipart(post_params, files) > > File "/Users/oscar/projects/work/whitelabel/oauth_client.py", line > > 111, in __encodeMultipart > > body = crlf.join(l) > > UnicodeDecodeError: 'ascii' codec can't decode byte 0xb7 in position > > 42: ordinal not in range(128) > > > Does anyone know why this errors happens and what I should do to > > prevent them? Many thanks. > > > Oscar > > I did a short test to demonstrate the likely problem, without all the > other libraries and complexity. > > lst = ["abc"] > lst.append("def") > lst.append(u"abc") > lst.append("g\x48\x82\x94i") > print lst > print "**".join(lst) > > That fragment of code generates (in Python 2.6) the following output and > traceback: > > ['abc', 'def', u'abc', 'gH\x82\x94i'] > Traceback (most recent call last): > File "M:\Programming\Python\sources\dummy\stuff2.py", line 10, in <module> > print "**".join(lst) > UnicodeDecodeError: 'ascii' codec can't decode byte 0x82 in position 2: > ordinal not in range(128) > > You'll notice that one of the strings is a unicode one, and another one > has the character 0x82 in it. Once join() discovers Unicode, it needs > to produce a Unicode string, and by default, it uses the ASCII codec to > get it. > > If you print your 'l' list (bad name, by the way, looks too much like a > '1'), you can see which element is Unicode, and which one has the \xb7 > in position 42. You'll have to decide which is the problem, and solve > it accordingly. Was the fact that one of the strings is unicode an > oversight? Or did you think that all characters would be 0x7f or less? > Or do you want to handle all possible characters, and if so, with what > encoding? > > DaveA
Thanks for your reply DaveA. Since I'm dealing with file uploads, I guess I should only care about those. I understand the fact that I'm trying to concatenate a unicode string with a binary, but I don't know how to deal with this. Perhaps the uploaded file should be encoded in some way? I don't think this is the case though. -- http://mail.python.org/mailman/listinfo/python-list