mirror of
https://github.com/python/cpython.git
synced 2026-01-26 12:55:08 +00:00
gh-139871: Optimize bytearray construction with encoding (#142243)
When a `str` is encoded in `bytearray.__init__` the encoder tends to
create a new unique bytes object. Rather than allocate new memory and
copy the bytes use the already created bytes object as bytearray
backing. The bigger the `str` the bigger the saving.
Mean +- std dev: [main_encoding] 497 us +- 9 us -> [encoding] 14.2 us +- 0.3 us: 34.97x faster
```python
import pyperf
runner = pyperf.Runner()
runner.timeit(
name="encode",
setup="a = 'a' * 1_000_000",
stmt="bytearray(a, encoding='utf8')")
```
This commit is contained in:
parent
850f95f6f6
commit
14e6052b43
@ -914,6 +914,10 @@ bytearray___init___impl(PyByteArrayObject *self, PyObject *arg,
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* Should be caused by first init or the resize to 0. */
|
||||
assert(self->ob_bytes_object == Py_GetConstantBorrowed(Py_CONSTANT_EMPTY_BYTES));
|
||||
assert(self->ob_exports == 0);
|
||||
|
||||
/* Make a quick exit if no first argument */
|
||||
if (arg == NULL) {
|
||||
if (encoding != NULL || errors != NULL) {
|
||||
@ -935,9 +939,20 @@ bytearray___init___impl(PyByteArrayObject *self, PyObject *arg,
|
||||
return -1;
|
||||
}
|
||||
encoded = PyUnicode_AsEncodedString(arg, encoding, errors);
|
||||
if (encoded == NULL)
|
||||
if (encoded == NULL) {
|
||||
return -1;
|
||||
}
|
||||
assert(PyBytes_Check(encoded));
|
||||
|
||||
/* Most encodes return a new unique bytes, just use it as buffer. */
|
||||
if (_PyObject_IsUniquelyReferenced(encoded)
|
||||
&& PyBytes_CheckExact(encoded))
|
||||
{
|
||||
Py_ssize_t size = Py_SIZE(encoded);
|
||||
self->ob_bytes_object = encoded;
|
||||
bytearray_reinit_from_bytes(self, size, size);
|
||||
return 0;
|
||||
}
|
||||
new = bytearray_iconcat((PyObject*)self, encoded);
|
||||
Py_DECREF(encoded);
|
||||
if (new == NULL)
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user