Skip to content
Open
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions Include/codecs.h
Original file line number Diff line number Diff line change
Expand Up @@ -170,6 +170,12 @@ PyAPI_FUNC(PyObject *) PyCodec_NameReplaceErrors(PyObject *exc);
PyAPI_DATA(const char *) Py_hexdigits;
#endif

#ifndef Py_LIMITED_API
PyAPI_FUNC(PyObject*) _PyCodec_LookupTextEncoding(
Comment thread
serhiy-storchaka marked this conversation as resolved.
Outdated
const char *encoding,
const char *alternate_command);
#endif

#ifdef __cplusplus
}
#endif
Expand Down
2 changes: 1 addition & 1 deletion Include/internal/pycore_codecs.h
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ extern int _PyCodec_UnregisterError(const char *name);
in Python 3.5+?

*/
extern PyObject* _PyCodec_LookupTextEncoding(
PyAPI_FUNC(PyObject*) _PyCodec_LookupTextEncoding(
const char *encoding,
const char *alternate_command);

Expand Down
5 changes: 4 additions & 1 deletion Lib/codecs.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,8 @@ class CodecInfo(tuple):

def __new__(cls, encode, decode, streamreader=None, streamwriter=None,
incrementalencoder=None, incrementaldecoder=None, name=None,
*, _is_text_encoding=None):
*, _is_text_encoding=None,
_is_single_byte=None):
self = tuple.__new__(cls, (encode, decode, streamreader, streamwriter))
self.name = name
self.encode = encode
Expand All @@ -104,6 +105,8 @@ def __new__(cls, encode, decode, streamreader=None, streamwriter=None,
self.streamreader = streamreader
if _is_text_encoding is not None:
self._is_text_encoding = _is_text_encoding
if _is_single_byte is not None:
self._is_single_byte = _is_single_byte
return self

def __repr__(self):
Expand Down
1 change: 1 addition & 0 deletions Lib/encodings/big5.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,4 +36,5 @@ def getregentry():
incrementaldecoder=IncrementalDecoder,
streamreader=StreamReader,
streamwriter=StreamWriter,
_is_single_byte=False,
)
1 change: 1 addition & 0 deletions Lib/encodings/big5hkscs.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,4 +36,5 @@ def getregentry():
incrementaldecoder=IncrementalDecoder,
streamreader=StreamReader,
streamwriter=StreamWriter,
_is_single_byte=False,
)
1 change: 1 addition & 0 deletions Lib/encodings/cp932.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,4 +36,5 @@ def getregentry():
incrementaldecoder=IncrementalDecoder,
streamreader=StreamReader,
streamwriter=StreamWriter,
_is_single_byte=False,
)
1 change: 1 addition & 0 deletions Lib/encodings/cp949.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,4 +36,5 @@ def getregentry():
incrementaldecoder=IncrementalDecoder,
streamreader=StreamReader,
streamwriter=StreamWriter,
_is_single_byte=False,
)
1 change: 1 addition & 0 deletions Lib/encodings/cp950.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,4 +36,5 @@ def getregentry():
incrementaldecoder=IncrementalDecoder,
streamreader=StreamReader,
streamwriter=StreamWriter,
_is_single_byte=False,
)
1 change: 1 addition & 0 deletions Lib/encodings/euc_jis_2004.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,4 +36,5 @@ def getregentry():
incrementaldecoder=IncrementalDecoder,
streamreader=StreamReader,
streamwriter=StreamWriter,
_is_single_byte=False,
)
1 change: 1 addition & 0 deletions Lib/encodings/euc_jisx0213.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,4 +36,5 @@ def getregentry():
incrementaldecoder=IncrementalDecoder,
streamreader=StreamReader,
streamwriter=StreamWriter,
_is_single_byte=False,
)
1 change: 1 addition & 0 deletions Lib/encodings/euc_jp.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,4 +36,5 @@ def getregentry():
incrementaldecoder=IncrementalDecoder,
streamreader=StreamReader,
streamwriter=StreamWriter,
_is_single_byte=False,
)
1 change: 1 addition & 0 deletions Lib/encodings/euc_kr.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,4 +36,5 @@ def getregentry():
incrementaldecoder=IncrementalDecoder,
streamreader=StreamReader,
streamwriter=StreamWriter,
_is_single_byte=False,
)
1 change: 1 addition & 0 deletions Lib/encodings/gb18030.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,4 +36,5 @@ def getregentry():
incrementaldecoder=IncrementalDecoder,
streamreader=StreamReader,
streamwriter=StreamWriter,
_is_single_byte=False,
)
1 change: 1 addition & 0 deletions Lib/encodings/gb2312.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,4 +36,5 @@ def getregentry():
incrementaldecoder=IncrementalDecoder,
streamreader=StreamReader,
streamwriter=StreamWriter,
_is_single_byte=False,
)
1 change: 1 addition & 0 deletions Lib/encodings/gbk.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,4 +36,5 @@ def getregentry():
incrementaldecoder=IncrementalDecoder,
streamreader=StreamReader,
streamwriter=StreamWriter,
_is_single_byte=False,
)
1 change: 1 addition & 0 deletions Lib/encodings/hz.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,4 +36,5 @@ def getregentry():
incrementaldecoder=IncrementalDecoder,
streamreader=StreamReader,
streamwriter=StreamWriter,
_is_single_byte=False,
)
1 change: 1 addition & 0 deletions Lib/encodings/idna.py
Original file line number Diff line number Diff line change
Expand Up @@ -385,4 +385,5 @@ def getregentry():
incrementaldecoder=IncrementalDecoder,
streamwriter=StreamWriter,
streamreader=StreamReader,
_is_single_byte=False,
)
1 change: 1 addition & 0 deletions Lib/encodings/iso2022_jp.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,4 +36,5 @@ def getregentry():
incrementaldecoder=IncrementalDecoder,
streamreader=StreamReader,
streamwriter=StreamWriter,
_is_single_byte=False,
)
1 change: 1 addition & 0 deletions Lib/encodings/iso2022_jp_1.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,4 +36,5 @@ def getregentry():
incrementaldecoder=IncrementalDecoder,
streamreader=StreamReader,
streamwriter=StreamWriter,
_is_single_byte=False,
)
1 change: 1 addition & 0 deletions Lib/encodings/iso2022_jp_2.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,4 +36,5 @@ def getregentry():
incrementaldecoder=IncrementalDecoder,
streamreader=StreamReader,
streamwriter=StreamWriter,
_is_single_byte=False,
)
1 change: 1 addition & 0 deletions Lib/encodings/iso2022_jp_2004.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,4 +36,5 @@ def getregentry():
incrementaldecoder=IncrementalDecoder,
streamreader=StreamReader,
streamwriter=StreamWriter,
_is_single_byte=False,
)
1 change: 1 addition & 0 deletions Lib/encodings/iso2022_jp_3.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,4 +36,5 @@ def getregentry():
incrementaldecoder=IncrementalDecoder,
streamreader=StreamReader,
streamwriter=StreamWriter,
_is_single_byte=False,
)
1 change: 1 addition & 0 deletions Lib/encodings/iso2022_jp_ext.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,4 +36,5 @@ def getregentry():
incrementaldecoder=IncrementalDecoder,
streamreader=StreamReader,
streamwriter=StreamWriter,
_is_single_byte=False,
)
1 change: 1 addition & 0 deletions Lib/encodings/iso2022_kr.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,4 +36,5 @@ def getregentry():
incrementaldecoder=IncrementalDecoder,
streamreader=StreamReader,
streamwriter=StreamWriter,
_is_single_byte=False,
)
1 change: 1 addition & 0 deletions Lib/encodings/johab.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,4 +36,5 @@ def getregentry():
incrementaldecoder=IncrementalDecoder,
streamreader=StreamReader,
streamwriter=StreamWriter,
_is_single_byte=False,
)
1 change: 1 addition & 0 deletions Lib/encodings/punycode.py
Original file line number Diff line number Diff line change
Expand Up @@ -250,4 +250,5 @@ def getregentry():
incrementaldecoder=IncrementalDecoder,
streamwriter=StreamWriter,
streamreader=StreamReader,
_is_single_byte=False,
)
1 change: 1 addition & 0 deletions Lib/encodings/raw_unicode_escape.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,4 +43,5 @@ def getregentry():
incrementaldecoder=IncrementalDecoder,
streamwriter=StreamWriter,
streamreader=StreamReader,
_is_single_byte=False,
)
1 change: 1 addition & 0 deletions Lib/encodings/shift_jis.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,4 +36,5 @@ def getregentry():
incrementaldecoder=IncrementalDecoder,
streamreader=StreamReader,
streamwriter=StreamWriter,
_is_single_byte=False,
)
1 change: 1 addition & 0 deletions Lib/encodings/shift_jis_2004.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,4 +36,5 @@ def getregentry():
incrementaldecoder=IncrementalDecoder,
streamreader=StreamReader,
streamwriter=StreamWriter,
_is_single_byte=False,
)
1 change: 1 addition & 0 deletions Lib/encodings/shift_jisx0213.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,4 +36,5 @@ def getregentry():
incrementaldecoder=IncrementalDecoder,
streamreader=StreamReader,
streamwriter=StreamWriter,
_is_single_byte=False,
)
1 change: 1 addition & 0 deletions Lib/encodings/unicode_escape.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,4 +43,5 @@ def getregentry():
incrementaldecoder=IncrementalDecoder,
streamwriter=StreamWriter,
streamreader=StreamReader,
_is_single_byte=False,
)
1 change: 1 addition & 0 deletions Lib/encodings/utf_16.py
Original file line number Diff line number Diff line change
Expand Up @@ -152,4 +152,5 @@ def getregentry():
incrementaldecoder=IncrementalDecoder,
streamreader=StreamReader,
streamwriter=StreamWriter,
_is_single_byte=False,
)
1 change: 1 addition & 0 deletions Lib/encodings/utf_16_be.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,4 +39,5 @@ def getregentry():
incrementaldecoder=IncrementalDecoder,
streamreader=StreamReader,
streamwriter=StreamWriter,
_is_single_byte=False,
)
1 change: 1 addition & 0 deletions Lib/encodings/utf_16_le.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,4 +39,5 @@ def getregentry():
incrementaldecoder=IncrementalDecoder,
streamreader=StreamReader,
streamwriter=StreamWriter,
_is_single_byte=False,
)
1 change: 1 addition & 0 deletions Lib/encodings/utf_32.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,4 +147,5 @@ def getregentry():
incrementaldecoder=IncrementalDecoder,
streamreader=StreamReader,
streamwriter=StreamWriter,
_is_single_byte=False,
)
1 change: 1 addition & 0 deletions Lib/encodings/utf_32_be.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,4 +34,5 @@ def getregentry():
incrementaldecoder=IncrementalDecoder,
streamreader=StreamReader,
streamwriter=StreamWriter,
_is_single_byte=False,
)
1 change: 1 addition & 0 deletions Lib/encodings/utf_32_le.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,4 +34,5 @@ def getregentry():
incrementaldecoder=IncrementalDecoder,
streamreader=StreamReader,
streamwriter=StreamWriter,
_is_single_byte=False,
)
1 change: 1 addition & 0 deletions Lib/encodings/utf_7.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,4 +35,5 @@ def getregentry():
incrementaldecoder=IncrementalDecoder,
streamreader=StreamReader,
streamwriter=StreamWriter,
_is_single_byte=False,
)
1 change: 1 addition & 0 deletions Lib/encodings/utf_8.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,4 +39,5 @@ def getregentry():
incrementaldecoder=IncrementalDecoder,
streamreader=StreamReader,
streamwriter=StreamWriter,
_is_single_byte=False,
)
1 change: 1 addition & 0 deletions Lib/encodings/utf_8_sig.py
Original file line number Diff line number Diff line change
Expand Up @@ -127,4 +127,5 @@ def getregentry():
incrementaldecoder=IncrementalDecoder,
streamreader=StreamReader,
streamwriter=StreamWriter,
_is_single_byte=False,
)
3 changes: 3 additions & 0 deletions Lib/test/test_codecs.py
Original file line number Diff line number Diff line change
Expand Up @@ -1892,6 +1892,7 @@ def test_copy(self):
self.assertIsNot(dup, orig)
self.assertEqual(dup, orig)
self.assertTrue(orig._is_text_encoding)
self.assertFalse(orig._is_single_byte)
self.assertEqual(dup.encode, orig.encode)
self.assertEqual(dup.name, orig.name)
self.assertEqual(dup.incrementalencoder, orig.incrementalencoder)
Expand All @@ -1912,6 +1913,7 @@ def test_deepcopy(self):
self.assertIsNot(dup, orig)
self.assertEqual(dup, orig)
self.assertTrue(orig._is_text_encoding)
self.assertFalse(orig._is_single_byte)
self.assertEqual(dup.encode, orig.encode)
self.assertEqual(dup.name, orig.name)
self.assertEqual(dup.incrementalencoder, orig.incrementalencoder)
Expand Down Expand Up @@ -1940,6 +1942,7 @@ def test_pickle(self):
unpickled_codec_info.incrementalencoder
)
self.assertTrue(unpickled_codec_info._is_text_encoding)
self.assertFalse(unpickled_codec_info._is_single_byte)

# Test a CodecInfo with _is_text_encoding equal to false.
codec_info = codecs.lookup('base64')
Expand Down
47 changes: 46 additions & 1 deletion Lib/test/test_pyexpat.py
Original file line number Diff line number Diff line change
Expand Up @@ -227,7 +227,7 @@ def _verify_parse_output(self, operations):
"Character data: '\xb5'",
"End element: 'root'",
]
for operation, expected_operation in zip(operations, expected_operations):
for operation, expected_operation in zip(operations, expected_operations, strict=True):
self.assertEqual(operation, expected_operation)

def test_parse_bytes(self):
Expand Down Expand Up @@ -276,6 +276,51 @@ def test_parse_again(self):
self.assertEqual(expat.ErrorString(cm.exception.code),
expat.errors.XML_ERROR_FINISHED)

@support.subTests('enc', ['UTF-8', 'utf-8', 'utf-16', 'koi8-u',
'cp1125', 'cp1251', 'iso8859-5',
'mac_cyrillic'])
def test_supportes_ecodings(self, enc):
out = self.Outputter()
parser = expat.ParserCreate()
self._hookup_callbacks(parser, out)
data = (f'<?xml version="1.0" encoding="{enc}"?>\n'
'<корінь атрибут="значення">зміст</корінь>').encode(enc)
parser.Parse(data, True)
self.assertEqual(out.out, [
('XML declaration', ('1.0', enc, -1)),
"Start element: 'корінь' {'атрибут': 'значення'}",
"Character data: 'зміст'",
"End element: 'корінь'",
])

@support.subTests('enc', [
'UTF8', 'UTF-7',
"unicode-escape", "raw-unicode-escape",
"Big5-HKSCS", "Big5",
"cp932", "cp949", "cp950",
"EUC_JIS-2004", "EUC_JISX0213", "EUC-JP", "EUC-KR",
"GB18030", "GB2312", "GBK",
"HZ-GB-2312",
"ISO-2022-JP", "ISO-2022-JP-1", "ISO-2022-JP-2004",
"ISO-2022-JP-2", "ISO-2022-JP-3", "ISO-2022-JP-EXT",
"ISO-2022-KR",
"johab",
"Shift_JIS", "Shift_JIS-2004", "Shift_JISX0213",
])
def test_unsupportes_ecodings(self, enc):
parser = expat.ParserCreate()
data = (f'<?xml version="1.0" encoding="{enc}"?>\n'
'<root></root>').encode(enc)
with self.assertRaises(ValueError):
parser.Parse(data, True)

def test_unknown_ecoding(self):
parser = expat.ParserCreate()
data = b'<?xml version="1.0" encoding="xyz"?>\n<root></root>'
with self.assertRaises(LookupError):
parser.Parse(data, True)


class NamespaceSeparatorTest(unittest.TestCase):
def test_legal(self):
# Tests that make sure we get errors when the namespace_separator value
Expand Down
Loading
Loading