blob: 7e4fad303200eb43f7c28f3d88cd94fde9494159 [file] [log] [blame]
Gregory P. Smithe3f63932015-04-26 00:41:00 +00001/* bytes to hex implementation */
2
3#include "Python.h"
4
Benjamin Petersone5024512018-09-12 12:06:42 -07005#include "pystrhex.h"
6
Gregory P. Smithe3f63932015-04-26 00:41:00 +00007static PyObject *_Py_strhex_impl(const char* argbuf, const Py_ssize_t arglen,
Gregory P. Smith0c2f9302019-05-29 11:46:58 -07008 const PyObject* sep, int bytes_per_sep_group,
9 const int return_bytes)
Gregory P. Smithe3f63932015-04-26 00:41:00 +000010{
Victor Stinner455df972020-04-15 14:05:24 +020011 assert(arglen >= 0);
Gregory P. Smith0c2f9302019-05-29 11:46:58 -070012
Victor Stinner455df972020-04-15 14:05:24 +020013 Py_UCS1 sep_char = 0;
Gregory P. Smith0c2f9302019-05-29 11:46:58 -070014 if (sep) {
Pablo Galindo938d9a02019-06-01 21:02:08 +010015 Py_ssize_t seplen = PyObject_Length((PyObject*)sep);
Gregory P. Smith0c2f9302019-05-29 11:46:58 -070016 if (seplen < 0) {
17 return NULL;
18 }
19 if (seplen != 1) {
20 PyErr_SetString(PyExc_ValueError, "sep must be length 1.");
21 return NULL;
22 }
23 if (PyUnicode_Check(sep)) {
24 if (PyUnicode_READY(sep))
25 return NULL;
26 if (PyUnicode_KIND(sep) != PyUnicode_1BYTE_KIND) {
27 PyErr_SetString(PyExc_ValueError, "sep must be ASCII.");
28 return NULL;
29 }
30 sep_char = PyUnicode_READ_CHAR(sep, 0);
Victor Stinner455df972020-04-15 14:05:24 +020031 }
32 else if (PyBytes_Check(sep)) {
Gregory P. Smith0c2f9302019-05-29 11:46:58 -070033 sep_char = PyBytes_AS_STRING(sep)[0];
Victor Stinner455df972020-04-15 14:05:24 +020034 }
35 else {
Gregory P. Smith0c2f9302019-05-29 11:46:58 -070036 PyErr_SetString(PyExc_TypeError, "sep must be str or bytes.");
37 return NULL;
38 }
39 if (sep_char > 127 && !return_bytes) {
40 PyErr_SetString(PyExc_ValueError, "sep must be ASCII.");
41 return NULL;
42 }
Victor Stinner455df972020-04-15 14:05:24 +020043 }
44 else {
Gregory P. Smith0c2f9302019-05-29 11:46:58 -070045 bytes_per_sep_group = 0;
46 }
Gregory P. Smithe3f63932015-04-26 00:41:00 +000047
Victor Stinner455df972020-04-15 14:05:24 +020048 unsigned int abs_bytes_per_sep = abs(bytes_per_sep_group);
49 Py_ssize_t resultlen = 0;
Gregory P. Smith0c2f9302019-05-29 11:46:58 -070050 if (bytes_per_sep_group && arglen > 0) {
51 /* How many sep characters we'll be inserting. */
52 resultlen = (arglen - 1) / abs_bytes_per_sep;
53 }
54 /* Bounds checking for our Py_ssize_t indices. */
55 if (arglen >= PY_SSIZE_T_MAX / 2 - resultlen) {
Gregory P. Smithe3f63932015-04-26 00:41:00 +000056 return PyErr_NoMemory();
Gregory P. Smith0c2f9302019-05-29 11:46:58 -070057 }
58 resultlen += arglen * 2;
59
Raymond Hettinger0138c4c2019-08-27 09:55:13 -070060 if ((size_t)abs_bytes_per_sep >= (size_t)arglen) {
Gregory P. Smith0c2f9302019-05-29 11:46:58 -070061 bytes_per_sep_group = 0;
62 abs_bytes_per_sep = 0;
63 }
Gregory P. Smithe3f63932015-04-26 00:41:00 +000064
Victor Stinner455df972020-04-15 14:05:24 +020065 PyObject *retval;
66 Py_UCS1 *retbuf;
Gregory P. Smithe3f63932015-04-26 00:41:00 +000067 if (return_bytes) {
68 /* If _PyBytes_FromSize() were public we could avoid malloc+copy. */
Victor Stinner455df972020-04-15 14:05:24 +020069 retval = PyBytes_FromStringAndSize(NULL, resultlen);
70 if (!retval) {
Serhiy Storchaka598ceae2017-11-28 17:56:10 +020071 return NULL;
Victor Stinner455df972020-04-15 14:05:24 +020072 }
73 retbuf = (Py_UCS1 *)PyBytes_AS_STRING(retval);
74 }
75 else {
76 retval = PyUnicode_New(resultlen, 127);
77 if (!retval) {
78 return NULL;
79 }
Serhiy Storchaka598ceae2017-11-28 17:56:10 +020080 retbuf = PyUnicode_1BYTE_DATA(retval);
Gregory P. Smithe3f63932015-04-26 00:41:00 +000081 }
82
Gregory P. Smith0c2f9302019-05-29 11:46:58 -070083 /* Hexlify */
Victor Stinner455df972020-04-15 14:05:24 +020084 Py_ssize_t i, j;
Gregory P. Smith0c2f9302019-05-29 11:46:58 -070085 for (i=j=0; i < arglen; ++i) {
Victor Stinner455df972020-04-15 14:05:24 +020086 assert((j + 1) < resultlen);
Gregory P. Smithe3f63932015-04-26 00:41:00 +000087 unsigned char c;
Victor Stinner455df972020-04-15 14:05:24 +020088 c = (argbuf[i] >> 4) & 0x0f;
Gregory P. Smithe3f63932015-04-26 00:41:00 +000089 retbuf[j++] = Py_hexdigits[c];
Victor Stinner455df972020-04-15 14:05:24 +020090 c = argbuf[i] & 0x0f;
Gregory P. Smithe3f63932015-04-26 00:41:00 +000091 retbuf[j++] = Py_hexdigits[c];
Gregory P. Smith0c2f9302019-05-29 11:46:58 -070092 if (bytes_per_sep_group && i < arglen - 1) {
93 Py_ssize_t anchor;
94 anchor = (bytes_per_sep_group > 0) ? (arglen - 1 - i) : (i + 1);
95 if (anchor % abs_bytes_per_sep == 0) {
96 retbuf[j++] = sep_char;
97 }
98 }
Gregory P. Smithe3f63932015-04-26 00:41:00 +000099 }
Gregory P. Smith0c2f9302019-05-29 11:46:58 -0700100 assert(j == resultlen);
Gregory P. Smithe3f63932015-04-26 00:41:00 +0000101
Gregory P. Smithe3f63932015-04-26 00:41:00 +0000102#ifdef Py_DEBUG
Victor Stinner455df972020-04-15 14:05:24 +0200103 if (!return_bytes) {
Gregory P. Smithe3f63932015-04-26 00:41:00 +0000104 assert(_PyUnicode_CheckConsistency(retval, 1));
105 }
106#endif
107
108 return retval;
109}
110
Benjamin Petersone5024512018-09-12 12:06:42 -0700111PyObject * _Py_strhex(const char* argbuf, const Py_ssize_t arglen)
Gregory P. Smithe3f63932015-04-26 00:41:00 +0000112{
Gregory P. Smith0c2f9302019-05-29 11:46:58 -0700113 return _Py_strhex_impl(argbuf, arglen, NULL, 0, 0);
Gregory P. Smithe3f63932015-04-26 00:41:00 +0000114}
115
116/* Same as above but returns a bytes() instead of str() to avoid the
117 * need to decode the str() when bytes are needed. */
Benjamin Petersone5024512018-09-12 12:06:42 -0700118PyObject * _Py_strhex_bytes(const char* argbuf, const Py_ssize_t arglen)
Gregory P. Smithe3f63932015-04-26 00:41:00 +0000119{
Gregory P. Smith0c2f9302019-05-29 11:46:58 -0700120 return _Py_strhex_impl(argbuf, arglen, NULL, 0, 1);
121}
122
123/* These variants include support for a separator between every N bytes: */
124
125PyObject * _Py_strhex_with_sep(const char* argbuf, const Py_ssize_t arglen, const PyObject* sep, const int bytes_per_group)
126{
127 return _Py_strhex_impl(argbuf, arglen, sep, bytes_per_group, 0);
128}
129
130/* Same as above but returns a bytes() instead of str() to avoid the
131 * need to decode the str() when bytes are needed. */
132PyObject * _Py_strhex_bytes_with_sep(const char* argbuf, const Py_ssize_t arglen, const PyObject* sep, const int bytes_per_group)
133{
134 return _Py_strhex_impl(argbuf, arglen, sep, bytes_per_group, 1);
Gregory P. Smithe3f63932015-04-26 00:41:00 +0000135}