Something like this in the docstring?: "In order to support the historical
JSON specification and closed ecosystem JSON, it is possible to specify an
encoding other than UTF-8."
8.1. Character Encoding
> JSON text exchanged between systems that are not part of a closed
> ecosystem MUST be encoded using UTF-8 [RFC3629].
> Previous specifications of JSON have not required the use of UTF-8
> when transmitting JSON text. However, the vast majority of JSON-
> based software implementations have chosen to use the UTF-8 encoding,
> to the extent that it is the only encoding that achieves
> interoperability.
> Implementations MUST NOT add a byte order mark (U+FEFF) to the
> beginning of a networked-transmitted JSON text. In the interests of
> interoperability, implementations that parse JSON texts MAY ignore
> the presence of a byte order mark rather than treating it as an
> error.
```python
import json
import os
def dumpf(obj, path, *, encoding="UTF-8", **kwargs):
with open(os.fspath(path), "w", encoding=encoding) as f:
return json.dump(obj, f, **kwargs)
def loadf(path, *, encoding="UTF-8", **kwargs):
with open(os.fspath(path), "r", encoding=encoding) as f:
return json.load(f, **kwargs)
import pathlib
import unittest
class TestJsonLoadfAndDumpf(unittest.TestCase):
def setUp(self):
self.encodings = [None, "UTF-8", "UTF-16", "UTF-32"]
data = dict(
obj=dict(a=dict(b=[1, 2, 3])),
path=pathlib.Path(".") / "test_loadf_and_dumpf.json",
)
if os.path.isfile(data["path"]):
os.unlink(data["path"])
self.data = data
def test_dumpf_and_loadf(self):
data = self.data
for encoding in self.encodings:
path = f'{data["path"]}.{encoding}.json'
dumpf_output = dumpf(data["obj"], path, encoding=encoding)
loadf_output = loadf(path, encoding=encoding)
assert loadf_output == data["obj"]
# $ pip install pytest-cov
# $ pytest -v example.py
# https://docs.pytest.org/en/stable/parametrize.html
# https://docs.pytest.org/en/stable/tmpdir.html
import pytest
@pytest.mark.parametrize("encoding", [None, "UTF-8", "UTF-16", "UTF-32"])
@pytest.mark.parametrize("obj", [dict(a=dict(b=[1, 2, 3]))])
def test_dumpf_and_loadf(obj, encoding, tmpdir):
pth = pathlib.Path(tmpdir) / f"test_loadf_and_dumpf.{encoding}.json"
dumpf_output = dumpf(obj, pth, encoding=encoding)
loadf_output = loadf(pth, encoding=encoding)
assert loadf_output == obj
```
For whoever creates a PR for this:
- [ ] add parameter and return type annotations
- [ ] copy docstrings from json.load/json.dump and open#encoding
- [ ] correctly support the c module implementation (this just does `import
json`)?
- [ ] keep or drop the encoding tests?
On Thu, Sep 17, 2020 at 1:25 AM Christopher Barker <[email protected]>
wrote:
> Is that suggested code? I don't follow.
>
> But if it is, no. personally, I think ANY use of system settings is a bad
> idea [*]. But certainly no need to even think about it for JSON.
>
> -CHB
>
> * have we not learned that in the age of the internet the machine the code
> happens to be running on has nothing to do with the user of the
> applications' needs? Timezones, encodings, number formats, NOTHING.
>
>
> On Wed, Sep 16, 2020 at 8:45 PM Wes Turner <[email protected]> wrote:
>
>> Is all of this locale/encoding testing necessary (or even sufficient)?
>>
>>
>> ```python
>> import json
>> import locale
>> import os
>>
>>
>> def get_default_encoding():
>> """
>> TODO XXX: ???
>> """
>> default_encoding = locale.getdefaultlocale()[1]
>> if default_encoding.startswith("UTF-"):
>> return default_encoding
>> else:
>> return "UTF-8"
>>
>>
>> def dumpf(obj, path, *args, **kwargs):
>> with open(
>> os.fspath(path),
>> "w",
>> encoding=kwargs.pop("encoding", get_default_encoding()),
>> ) as file_:
>> return json.dump(obj, file_, *args, **kwargs)
>>
>>
>> def loadf(path, *args, **kwargs):
>> with open(
>> os.fspath(path),
>> "r",
>> encoding=kwargs.pop("encoding", get_default_encoding()),
>> ) as file_:
>> return json.load(file_, *args, **kwargs)
>>
>>
>> import pathlib
>> import unittest
>>
>>
>> class TestJsonLoadfAndDumpf(unittest.TestCase):
>> def setUp(self):
>> self.locales = ["", "C", "en_US.UTF-8", "japanese"]
>> self.encodings = [None, "UTF-8", "UTF-16", "UTF-32"]
>>
>> data = dict(
>> obj=dict(a=dict(b=[1, 2, 3])),
>> encoding=None,
>> path=pathlib.Path(".") / "test_loadf_and_dumpf.json",
>> )
>> if os.path.isfile(data["path"]):
>> os.unlink(data["path"])
>> self.data = data
>>
>> self.previous_locale = locale.getlocale()
>>
>> def tearDown(self):
>> locale.setlocale(locale.LC_ALL, self.previous_locale)
>>
>> def test_get_default_encoding(self):
>> for localestr in self.locales:
>> locale.setlocale(locale.LC_ALL, localestr)
>> output = get_default_encoding()
>> assert output.startswith("UTF-")
>>
>> def test_dumpf_and_loadf(self):
>> data = self.data
>> for localestr in self.locales:
>> locale.setlocale(locale.LC_ALL, localestr)
>> for encoding in self.encodings:
>> dumpf_output = dumpf(
>> data["obj"], data["path"], encoding=encoding
>> )
>> loadf_output = loadf(data["path"], encoding=encoding)
>> assert loadf_output == data["obj"]
>> ```
>>
>> On Wed, Sep 16, 2020 at 8:30 PM Christopher Barker <[email protected]>
>> wrote:
>>
>>> On Wed, Sep 16, 2020 at 2:53 PM Wes Turner <[email protected]> wrote:
>>>
>>>> So I was not correct: dump does not default to UTF-8 (and does not
>>>> accept an encoding= parameter)
>>>>
>>>>
>>>>> I think dumpf() should use UTF-8, and that's it. If anyone really
>>>>> wants something else, they can get it by providing an open text file
>>>>> object.
>>>>>
>>>>
>>>> Why would we impose UTF-8 when the spec says UTF-8, UTF-16, or UTF-32?
>>>>
>>>
>>> The idea was that the encoding was one of the motivators to doing this
>>> in the first place. But I suppose as long as utf-8 is the default, and only
>>> the three "official" ones are allowed, then yeah, we could add an encoding
>>> keyword argument.
>>>
>>> -CHB
>>>
>>>
>>> --
>>> Christopher Barker, PhD
>>>
>>> Python Language Consulting
>>> - Teaching
>>> - Scientific Software Development
>>> - Desktop GUI and Web Development
>>> - wxPython, numpy, scipy, Cython
>>>
>>
>
> --
> Christopher Barker, PhD
>
> Python Language Consulting
> - Teaching
> - Scientific Software Development
> - Desktop GUI and Web Development
> - wxPython, numpy, scipy, Cython
>
_______________________________________________
Python-ideas mailing list -- [email protected]
To unsubscribe send an email to [email protected]
https://mail.python.org/mailman3/lists/python-ideas.python.org/
Message archived at
https://mail.python.org/archives/list/[email protected]/message/I7FFASD4TSM7JXY3RXD3GOB4WCYOZE4N/
Code of Conduct: http://python.org/psf/codeofconduct/