blob: 7dd7b753cc8cd55bb79468f64cb163b48aee16e7 [file] [log] [blame]
Craig Tiller3bc8ebd2015-06-24 15:41:15 -07001/*
2 *
Craig Tiller6169d5f2016-03-31 07:46:18 -07003 * Copyright 2015, Google Inc.
Craig Tiller3bc8ebd2015-06-24 15:41:15 -07004 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions are
8 * met:
9 *
10 * * Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * * Redistributions in binary form must reproduce the above
13 * copyright notice, this list of conditions and the following disclaimer
14 * in the documentation and/or other materials provided with the
15 * distribution.
16 * * Neither the name of Google Inc. nor the names of its
17 * contributors may be used to endorse or promote products derived from
18 * this software without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 *
32 */
33
Mark D. Roth2137cd82016-09-14 09:04:00 -070034#include "src/core/ext/client_channel/uri_parser.h"
Craig Tiller3bc8ebd2015-06-24 15:41:15 -070035
36#include <string.h>
37
Craig Tiller0f310802016-10-26 16:25:56 -070038#include <grpc/slice.h>
39#include <grpc/slice_buffer.h>
Craig Tiller3bc8ebd2015-06-24 15:41:15 -070040#include <grpc/support/alloc.h>
41#include <grpc/support/log.h>
yang-gb063c872015-10-07 11:40:13 -070042#include <grpc/support/port_platform.h>
Craig Tiller3bc8ebd2015-06-24 15:41:15 -070043#include <grpc/support/string_util.h>
44
David Garcia Quintase3a49f82016-03-30 17:19:29 -070045#include "src/core/lib/support/string.h"
46
Craig Tillerf29a3882015-09-11 12:59:27 -070047/** a size_t default value... maps to all 1's */
48#define NOT_SET (~(size_t)0)
49
Craig Tillera82950e2015-09-22 12:33:20 -070050static grpc_uri *bad_uri(const char *uri_text, size_t pos, const char *section,
51 int suppress_errors) {
Craig Tiller3bc8ebd2015-06-24 15:41:15 -070052 char *line_prefix;
Craig Tiller3121fd42015-09-10 09:56:20 -070053 size_t pfx_len;
Craig Tiller3bc8ebd2015-06-24 15:41:15 -070054
Craig Tillera82950e2015-09-22 12:33:20 -070055 if (!suppress_errors) {
56 gpr_asprintf(&line_prefix, "bad uri.%s: '", section);
57 pfx_len = strlen(line_prefix) + pos;
58 gpr_log(GPR_ERROR, "%s%s'", line_prefix, uri_text);
59 gpr_free(line_prefix);
Craig Tiller3bc8ebd2015-06-24 15:41:15 -070060
Craig Tillera82950e2015-09-22 12:33:20 -070061 line_prefix = gpr_malloc(pfx_len + 1);
62 memset(line_prefix, ' ', pfx_len);
63 line_prefix[pfx_len] = 0;
64 gpr_log(GPR_ERROR, "%s^ here", line_prefix);
65 gpr_free(line_prefix);
66 }
Craig Tiller3bc8ebd2015-06-24 15:41:15 -070067
68 return NULL;
69}
70
David Garcia Quintas52678912015-09-07 11:28:58 -070071/** Returns a copy of \a src[begin, end) */
Craig Tillera82950e2015-09-22 12:33:20 -070072static char *copy_component(const char *src, size_t begin, size_t end) {
73 char *out = gpr_malloc(end - begin + 1);
74 memcpy(out, src + begin, end - begin);
Craig Tiller3bc8ebd2015-06-24 15:41:15 -070075 out[end - begin] = 0;
76 return out;
77}
78
David Garcia Quintas52678912015-09-07 11:28:58 -070079/** Returns how many chars to advance if \a uri_text[i] begins a valid \a pchar
80 * production. If \a uri_text[i] introduces an invalid \a pchar (such as percent
Craig Tillerf29a3882015-09-11 12:59:27 -070081 * sign not followed by two hex digits), NOT_SET is returned. */
Craig Tillera82950e2015-09-22 12:33:20 -070082static size_t parse_pchar(const char *uri_text, size_t i) {
David Garcia Quintas52678912015-09-07 11:28:58 -070083 /* pchar = unreserved / pct-encoded / sub-delims / ":" / "@"
84 * unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
85 * pct-encoded = "%" HEXDIG HEXDIG
86 * sub-delims = "!" / "$" / "&" / "'" / "(" / ")"
Craig Tiller45724b32015-09-22 10:42:19 -070087 / "*" / "+" / "," / ";" / "=" */
David Garcia Quintas52678912015-09-07 11:28:58 -070088 char c = uri_text[i];
Craig Tillera82950e2015-09-22 12:33:20 -070089 if (((c >= 'A') && (c <= 'Z')) || ((c >= 'a') && (c <= 'z')) ||
90 ((c >= '0') && (c <= '9')) ||
91 (c == '-' || c == '.' || c == '_' || c == '~') || /* unreserved */
92 (c == '!' || c == '$' || c == '&' || c == '\'' || c == '$' || c == '&' ||
93 c == '(' || c == ')' || c == '*' || c == '+' || c == ',' || c == ';' ||
94 c == '=') /* sub-delims */) {
95 return 1;
96 }
97 if (c == '%') { /* pct-encoded */
98 size_t j;
99 if (uri_text[i + 1] == 0 || uri_text[i + 2] == 0) {
100 return NOT_SET;
David Garcia Quintas52678912015-09-07 11:28:58 -0700101 }
Craig Tillera82950e2015-09-22 12:33:20 -0700102 for (j = i + 1; j < 2; j++) {
103 c = uri_text[j];
104 if (!(((c >= '0') && (c <= '9')) || ((c >= 'a') && (c <= 'f')) ||
105 ((c >= 'A') && (c <= 'F')))) {
106 return NOT_SET;
107 }
David Garcia Quintas52678912015-09-07 11:28:58 -0700108 }
Craig Tillera82950e2015-09-22 12:33:20 -0700109 return 2;
110 }
David Garcia Quintas52678912015-09-07 11:28:58 -0700111 return 0;
112}
113
114/* *( pchar / "?" / "/" ) */
Craig Tillera82950e2015-09-22 12:33:20 -0700115static int parse_fragment_or_query(const char *uri_text, size_t *i) {
David Garcia Quintas52678912015-09-07 11:28:58 -0700116 char c;
Craig Tillera82950e2015-09-22 12:33:20 -0700117 while ((c = uri_text[*i]) != 0) {
118 const size_t advance = parse_pchar(uri_text, *i); /* pchar */
119 switch (advance) {
120 case 0: /* uri_text[i] isn't in pchar */
121 /* maybe it's ? or / */
122 if (uri_text[*i] == '?' || uri_text[*i] == '/') {
123 (*i)++;
124 break;
125 } else {
126 return 1;
127 }
yang-gb063c872015-10-07 11:40:13 -0700128 GPR_UNREACHABLE_CODE(return 0);
Craig Tillera82950e2015-09-22 12:33:20 -0700129 default:
130 (*i) += advance;
131 break;
132 case NOT_SET: /* uri_text[i] introduces an invalid URI */
133 return 0;
David Garcia Quintas52678912015-09-07 11:28:58 -0700134 }
Craig Tillera82950e2015-09-22 12:33:20 -0700135 }
Craig Tillerf29a3882015-09-11 12:59:27 -0700136 /* *i is the first uri_text position past the \a query production, maybe \0 */
137 return 1;
David Garcia Quintas52678912015-09-07 11:28:58 -0700138}
139
David Garcia Quintase3a49f82016-03-30 17:19:29 -0700140static void parse_query_parts(grpc_uri *uri) {
141 static const char *QUERY_PARTS_SEPARATOR = "&";
142 static const char *QUERY_PARTS_VALUE_SEPARATOR = "=";
143 GPR_ASSERT(uri->query != NULL);
144 if (uri->query[0] == '\0') {
145 uri->query_parts = NULL;
146 uri->query_parts_values = NULL;
147 uri->num_query_parts = 0;
148 return;
149 }
David Garcia Quintase3a49f82016-03-30 17:19:29 -0700150
Craig Tiller87a7e1f2016-11-09 09:42:19 -0800151 gpr_string_split(uri->query, QUERY_PARTS_SEPARATOR, &uri->query_parts,
152 &uri->num_query_parts);
153 uri->query_parts_values = gpr_malloc(uri->num_query_parts * sizeof(char **));
154 for (size_t i = 0; i < uri->num_query_parts; i++) {
155 char **query_param_parts;
156 size_t num_query_param_parts;
157 char *full = uri->query_parts[i];
158 gpr_string_split(full, QUERY_PARTS_VALUE_SEPARATOR, &query_param_parts,
159 &num_query_param_parts);
160 GPR_ASSERT(num_query_param_parts > 0);
161 uri->query_parts[i] = query_param_parts[0];
162 if (num_query_param_parts > 1) {
David Garcia Quintase3a49f82016-03-30 17:19:29 -0700163 /* TODO(dgq): only the first value after the separator is considered.
164 * Perhaps all chars after the first separator for the query part should
165 * be included, even if they include the separator. */
Craig Tiller87a7e1f2016-11-09 09:42:19 -0800166 uri->query_parts_values[i] = query_param_parts[1];
David Garcia Quintase3a49f82016-03-30 17:19:29 -0700167 } else {
168 uri->query_parts_values[i] = NULL;
169 }
Craig Tiller87a7e1f2016-11-09 09:42:19 -0800170 for (size_t j = 2; j < num_query_param_parts; j++) {
171 gpr_free(query_param_parts[j]);
172 }
173 gpr_free(query_param_parts);
174 gpr_free(full);
David Garcia Quintase3a49f82016-03-30 17:19:29 -0700175 }
David Garcia Quintase3a49f82016-03-30 17:19:29 -0700176}
177
Craig Tillera82950e2015-09-22 12:33:20 -0700178grpc_uri *grpc_uri_parse(const char *uri_text, int suppress_errors) {
Craig Tiller3bc8ebd2015-06-24 15:41:15 -0700179 grpc_uri *uri;
Craig Tiller3121fd42015-09-10 09:56:20 -0700180 size_t scheme_begin = 0;
Craig Tiller3121fd42015-09-10 09:56:20 -0700181 size_t scheme_end = NOT_SET;
182 size_t authority_begin = NOT_SET;
183 size_t authority_end = NOT_SET;
184 size_t path_begin = NOT_SET;
185 size_t path_end = NOT_SET;
Craig Tillerf29a3882015-09-11 12:59:27 -0700186 size_t query_begin = NOT_SET;
187 size_t query_end = NOT_SET;
188 size_t fragment_begin = NOT_SET;
189 size_t fragment_end = NOT_SET;
Craig Tiller3121fd42015-09-10 09:56:20 -0700190 size_t i;
Craig Tiller3bc8ebd2015-06-24 15:41:15 -0700191
Craig Tillera82950e2015-09-22 12:33:20 -0700192 for (i = scheme_begin; uri_text[i] != 0; i++) {
193 if (uri_text[i] == ':') {
194 scheme_end = i;
Craig Tiller3bc8ebd2015-06-24 15:41:15 -0700195 break;
196 }
Craig Tillera82950e2015-09-22 12:33:20 -0700197 if (uri_text[i] >= 'a' && uri_text[i] <= 'z') continue;
198 if (uri_text[i] >= 'A' && uri_text[i] <= 'Z') continue;
199 if (i != scheme_begin) {
200 if (uri_text[i] >= '0' && uri_text[i] <= '9') continue;
201 if (uri_text[i] == '+') continue;
202 if (uri_text[i] == '-') continue;
203 if (uri_text[i] == '.') continue;
Craig Tiller3bc8ebd2015-06-24 15:41:15 -0700204 }
Craig Tillera82950e2015-09-22 12:33:20 -0700205 break;
206 }
207 if (scheme_end == NOT_SET) {
208 return bad_uri(uri_text, i, "scheme", suppress_errors);
209 }
Craig Tiller3bc8ebd2015-06-24 15:41:15 -0700210
Craig Tillera82950e2015-09-22 12:33:20 -0700211 if (uri_text[scheme_end + 1] == '/' && uri_text[scheme_end + 2] == '/') {
212 authority_begin = scheme_end + 3;
213 for (i = authority_begin; uri_text[i] != 0 && authority_end == NOT_SET;
214 i++) {
215 if (uri_text[i] == '/' || uri_text[i] == '?' || uri_text[i] == '#') {
216 authority_end = i;
217 }
Craig Tiller3bc8ebd2015-06-24 15:41:15 -0700218 }
Craig Tillera82950e2015-09-22 12:33:20 -0700219 if (authority_end == NOT_SET && uri_text[i] == 0) {
220 authority_end = i;
Craig Tiller3bc8ebd2015-06-24 15:41:15 -0700221 }
Craig Tillera82950e2015-09-22 12:33:20 -0700222 if (authority_end == NOT_SET) {
223 return bad_uri(uri_text, i, "authority", suppress_errors);
224 }
225 /* TODO(ctiller): parse the authority correctly */
226 path_begin = authority_end;
227 } else {
228 path_begin = scheme_end + 1;
229 }
Craig Tiller3bc8ebd2015-06-24 15:41:15 -0700230
Craig Tillera82950e2015-09-22 12:33:20 -0700231 for (i = path_begin; uri_text[i] != 0; i++) {
232 if (uri_text[i] == '?' || uri_text[i] == '#') {
David Garcia Quintas52678912015-09-07 11:28:58 -0700233 path_end = i;
Craig Tillera82950e2015-09-22 12:33:20 -0700234 break;
Craig Tiller3bc8ebd2015-06-24 15:41:15 -0700235 }
Craig Tillera82950e2015-09-22 12:33:20 -0700236 }
237 if (path_end == NOT_SET && uri_text[i] == 0) {
238 path_end = i;
239 }
240 if (path_end == NOT_SET) {
241 return bad_uri(uri_text, i, "path", suppress_errors);
242 }
David Garcia Quintas52678912015-09-07 11:28:58 -0700243
Craig Tillera82950e2015-09-22 12:33:20 -0700244 if (uri_text[i] == '?') {
245 query_begin = ++i;
246 if (!parse_fragment_or_query(uri_text, &i)) {
247 return bad_uri(uri_text, i, "query", suppress_errors);
248 } else if (uri_text[i] != 0 && uri_text[i] != '#') {
249 /* We must be at the end or at the beginning of a fragment */
250 return bad_uri(uri_text, i, "query", suppress_errors);
David Garcia Quintas52678912015-09-07 11:28:58 -0700251 }
Craig Tillera82950e2015-09-22 12:33:20 -0700252 query_end = i;
253 }
254 if (uri_text[i] == '#') {
255 fragment_begin = ++i;
256 if (!parse_fragment_or_query(uri_text, &i)) {
257 return bad_uri(uri_text, i - fragment_end, "fragment", suppress_errors);
258 } else if (uri_text[i] != 0) {
259 /* We must be at the end */
260 return bad_uri(uri_text, i, "fragment", suppress_errors);
David Garcia Quintas52678912015-09-07 11:28:58 -0700261 }
Craig Tillera82950e2015-09-22 12:33:20 -0700262 fragment_end = i;
263 }
Craig Tiller3bc8ebd2015-06-24 15:41:15 -0700264
Craig Tiller6f417882017-02-16 14:09:39 -0800265 uri = gpr_zalloc(sizeof(*uri));
Craig Tillera82950e2015-09-22 12:33:20 -0700266 uri->scheme = copy_component(uri_text, scheme_begin, scheme_end);
267 uri->authority = copy_component(uri_text, authority_begin, authority_end);
268 uri->path = copy_component(uri_text, path_begin, path_end);
269 uri->query = copy_component(uri_text, query_begin, query_end);
270 uri->fragment = copy_component(uri_text, fragment_begin, fragment_end);
David Garcia Quintase3a49f82016-03-30 17:19:29 -0700271 parse_query_parts(uri);
Craig Tiller3bc8ebd2015-06-24 15:41:15 -0700272
273 return uri;
274}
275
David Garcia Quintas057054f2016-03-31 09:20:42 -0700276const char *grpc_uri_get_query_arg(const grpc_uri *uri, const char *key) {
277 GPR_ASSERT(key != NULL);
278 if (key[0] == '\0') return NULL;
279
280 for (size_t i = 0; i < uri->num_query_parts; ++i) {
281 if (0 == strcmp(key, uri->query_parts[i])) {
282 return uri->query_parts_values[i];
283 }
284 }
285 return NULL;
286}
287
Craig Tillera82950e2015-09-22 12:33:20 -0700288void grpc_uri_destroy(grpc_uri *uri) {
289 if (!uri) return;
290 gpr_free(uri->scheme);
291 gpr_free(uri->authority);
292 gpr_free(uri->path);
293 gpr_free(uri->query);
David Garcia Quintase3a49f82016-03-30 17:19:29 -0700294 for (size_t i = 0; i < uri->num_query_parts; ++i) {
295 gpr_free(uri->query_parts[i]);
296 gpr_free(uri->query_parts_values[i]);
297 }
298 gpr_free(uri->query_parts);
299 gpr_free(uri->query_parts_values);
Craig Tillera82950e2015-09-22 12:33:20 -0700300 gpr_free(uri->fragment);
301 gpr_free(uri);
Craig Tiller3bc8ebd2015-06-24 15:41:15 -0700302}