blob: b74e57ad913b11109f32f7a183e932836a784696 [file] [log] [blame]
Gregory P. Smithe3f63932015-04-26 00:41:00 +00001/* bytes to hex implementation */
2
3#include "Python.h"
4
Benjamin Petersone5024512018-09-12 12:06:42 -07005#include "pystrhex.h"
6
Gregory P. Smithe3f63932015-04-26 00:41:00 +00007static PyObject *_Py_strhex_impl(const char* argbuf, const Py_ssize_t arglen,
Gregory P. Smith0c2f9302019-05-29 11:46:58 -07008 const PyObject* sep, int bytes_per_sep_group,
9 const int return_bytes)
Gregory P. Smithe3f63932015-04-26 00:41:00 +000010{
Victor Stinner455df972020-04-15 14:05:24 +020011 assert(arglen >= 0);
Gregory P. Smith0c2f9302019-05-29 11:46:58 -070012
Victor Stinner455df972020-04-15 14:05:24 +020013 Py_UCS1 sep_char = 0;
Gregory P. Smith0c2f9302019-05-29 11:46:58 -070014 if (sep) {
Pablo Galindo938d9a02019-06-01 21:02:08 +010015 Py_ssize_t seplen = PyObject_Length((PyObject*)sep);
Gregory P. Smith0c2f9302019-05-29 11:46:58 -070016 if (seplen < 0) {
17 return NULL;
18 }
19 if (seplen != 1) {
20 PyErr_SetString(PyExc_ValueError, "sep must be length 1.");
21 return NULL;
22 }
23 if (PyUnicode_Check(sep)) {
24 if (PyUnicode_READY(sep))
25 return NULL;
26 if (PyUnicode_KIND(sep) != PyUnicode_1BYTE_KIND) {
27 PyErr_SetString(PyExc_ValueError, "sep must be ASCII.");
28 return NULL;
29 }
30 sep_char = PyUnicode_READ_CHAR(sep, 0);
Victor Stinner455df972020-04-15 14:05:24 +020031 }
32 else if (PyBytes_Check(sep)) {
Gregory P. Smith0c2f9302019-05-29 11:46:58 -070033 sep_char = PyBytes_AS_STRING(sep)[0];
Victor Stinner455df972020-04-15 14:05:24 +020034 }
35 else {
Gregory P. Smith0c2f9302019-05-29 11:46:58 -070036 PyErr_SetString(PyExc_TypeError, "sep must be str or bytes.");
37 return NULL;
38 }
39 if (sep_char > 127 && !return_bytes) {
40 PyErr_SetString(PyExc_ValueError, "sep must be ASCII.");
41 return NULL;
42 }
Victor Stinner455df972020-04-15 14:05:24 +020043 }
44 else {
Gregory P. Smith0c2f9302019-05-29 11:46:58 -070045 bytes_per_sep_group = 0;
46 }
Gregory P. Smithe3f63932015-04-26 00:41:00 +000047
Victor Stinner455df972020-04-15 14:05:24 +020048 unsigned int abs_bytes_per_sep = abs(bytes_per_sep_group);
49 Py_ssize_t resultlen = 0;
Gregory P. Smith0c2f9302019-05-29 11:46:58 -070050 if (bytes_per_sep_group && arglen > 0) {
51 /* How many sep characters we'll be inserting. */
52 resultlen = (arglen - 1) / abs_bytes_per_sep;
53 }
54 /* Bounds checking for our Py_ssize_t indices. */
55 if (arglen >= PY_SSIZE_T_MAX / 2 - resultlen) {
Gregory P. Smithe3f63932015-04-26 00:41:00 +000056 return PyErr_NoMemory();
Gregory P. Smith0c2f9302019-05-29 11:46:58 -070057 }
58 resultlen += arglen * 2;
59
Raymond Hettinger0138c4c2019-08-27 09:55:13 -070060 if ((size_t)abs_bytes_per_sep >= (size_t)arglen) {
Gregory P. Smith0c2f9302019-05-29 11:46:58 -070061 bytes_per_sep_group = 0;
62 abs_bytes_per_sep = 0;
63 }
Gregory P. Smithe3f63932015-04-26 00:41:00 +000064
Victor Stinner455df972020-04-15 14:05:24 +020065 PyObject *retval;
66 Py_UCS1 *retbuf;
Gregory P. Smithe3f63932015-04-26 00:41:00 +000067 if (return_bytes) {
68 /* If _PyBytes_FromSize() were public we could avoid malloc+copy. */
Victor Stinner455df972020-04-15 14:05:24 +020069 retval = PyBytes_FromStringAndSize(NULL, resultlen);
70 if (!retval) {
Serhiy Storchaka598ceae2017-11-28 17:56:10 +020071 return NULL;
Victor Stinner455df972020-04-15 14:05:24 +020072 }
73 retbuf = (Py_UCS1 *)PyBytes_AS_STRING(retval);
74 }
75 else {
76 retval = PyUnicode_New(resultlen, 127);
77 if (!retval) {
78 return NULL;
79 }
Serhiy Storchaka598ceae2017-11-28 17:56:10 +020080 retbuf = PyUnicode_1BYTE_DATA(retval);
Gregory P. Smithe3f63932015-04-26 00:41:00 +000081 }
82
Gregory P. Smith0c2f9302019-05-29 11:46:58 -070083 /* Hexlify */
Victor Stinner455df972020-04-15 14:05:24 +020084 Py_ssize_t i, j;
sweeneyde6a9e80a2020-04-20 20:17:52 -040085 unsigned char c;
86
87 if (bytes_per_sep_group == 0) {
88 for (i = j = 0; i < arglen; ++i) {
89 assert((j + 1) < resultlen);
90 c = argbuf[i];
91 retbuf[j++] = Py_hexdigits[c >> 4];
92 retbuf[j++] = Py_hexdigits[c & 0x0f];
93 }
94 assert(j == resultlen);
95 }
96 else {
97 /* The number of complete chunk+sep periods */
98 Py_ssize_t chunks = (arglen - 1) / abs_bytes_per_sep;
99 Py_ssize_t chunk;
100 unsigned int k;
101
102 if (bytes_per_sep_group < 0) {
103 i = j = 0;
104 for (chunk = 0; chunk < chunks; chunk++) {
105 for (k = 0; k < abs_bytes_per_sep; k++) {
106 c = argbuf[i++];
107 retbuf[j++] = Py_hexdigits[c >> 4];
108 retbuf[j++] = Py_hexdigits[c & 0x0f];
109 }
Gregory P. Smith0c2f9302019-05-29 11:46:58 -0700110 retbuf[j++] = sep_char;
111 }
sweeneyde6a9e80a2020-04-20 20:17:52 -0400112 while (i < arglen) {
113 c = argbuf[i++];
114 retbuf[j++] = Py_hexdigits[c >> 4];
115 retbuf[j++] = Py_hexdigits[c & 0x0f];
116 }
117 assert(j == resultlen);
118 }
119 else {
120 i = arglen - 1;
121 j = resultlen - 1;
122 for (chunk = 0; chunk < chunks; chunk++) {
123 for (k = 0; k < abs_bytes_per_sep; k++) {
124 c = argbuf[i--];
125 retbuf[j--] = Py_hexdigits[c & 0x0f];
126 retbuf[j--] = Py_hexdigits[c >> 4];
127 }
128 retbuf[j--] = sep_char;
129 }
130 while (i >= 0) {
131 c = argbuf[i--];
132 retbuf[j--] = Py_hexdigits[c & 0x0f];
133 retbuf[j--] = Py_hexdigits[c >> 4];
134 }
135 assert(j == -1);
Gregory P. Smith0c2f9302019-05-29 11:46:58 -0700136 }
Gregory P. Smithe3f63932015-04-26 00:41:00 +0000137 }
138
Gregory P. Smithe3f63932015-04-26 00:41:00 +0000139#ifdef Py_DEBUG
Victor Stinner455df972020-04-15 14:05:24 +0200140 if (!return_bytes) {
Gregory P. Smithe3f63932015-04-26 00:41:00 +0000141 assert(_PyUnicode_CheckConsistency(retval, 1));
142 }
143#endif
144
145 return retval;
146}
147
Benjamin Petersone5024512018-09-12 12:06:42 -0700148PyObject * _Py_strhex(const char* argbuf, const Py_ssize_t arglen)
Gregory P. Smithe3f63932015-04-26 00:41:00 +0000149{
Gregory P. Smith0c2f9302019-05-29 11:46:58 -0700150 return _Py_strhex_impl(argbuf, arglen, NULL, 0, 0);
Gregory P. Smithe3f63932015-04-26 00:41:00 +0000151}
152
153/* Same as above but returns a bytes() instead of str() to avoid the
154 * need to decode the str() when bytes are needed. */
Benjamin Petersone5024512018-09-12 12:06:42 -0700155PyObject * _Py_strhex_bytes(const char* argbuf, const Py_ssize_t arglen)
Gregory P. Smithe3f63932015-04-26 00:41:00 +0000156{
Gregory P. Smith0c2f9302019-05-29 11:46:58 -0700157 return _Py_strhex_impl(argbuf, arglen, NULL, 0, 1);
158}
159
160/* These variants include support for a separator between every N bytes: */
161
162PyObject * _Py_strhex_with_sep(const char* argbuf, const Py_ssize_t arglen, const PyObject* sep, const int bytes_per_group)
163{
164 return _Py_strhex_impl(argbuf, arglen, sep, bytes_per_group, 0);
165}
166
167/* Same as above but returns a bytes() instead of str() to avoid the
168 * need to decode the str() when bytes are needed. */
169PyObject * _Py_strhex_bytes_with_sep(const char* argbuf, const Py_ssize_t arglen, const PyObject* sep, const int bytes_per_group)
170{
171 return _Py_strhex_impl(argbuf, arglen, sep, bytes_per_group, 1);
Gregory P. Smithe3f63932015-04-26 00:41:00 +0000172}