blob: b76dcbe4e3ba48ec0593a306ba9cab8e301bdc23 [file] [log] [blame]
Craig Tiller3bc8ebd2015-06-24 15:41:15 -07001/*
2 *
Jan Tattermusch7897ae92017-06-07 22:57:36 +02003 * Copyright 2015 gRPC authors.
Craig Tiller3bc8ebd2015-06-24 15:41:15 -07004 *
Jan Tattermusch7897ae92017-06-07 22:57:36 +02005 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at
Craig Tiller3bc8ebd2015-06-24 15:41:15 -07008 *
Jan Tattermusch7897ae92017-06-07 22:57:36 +02009 * http://www.apache.org/licenses/LICENSE-2.0
Craig Tiller3bc8ebd2015-06-24 15:41:15 -070010 *
Jan Tattermusch7897ae92017-06-07 22:57:36 +020011 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
Craig Tiller3bc8ebd2015-06-24 15:41:15 -070016 *
17 */
18
Craig Tiller9eb0fde2017-03-31 16:59:30 -070019#include "src/core/ext/filters/client_channel/uri_parser.h"
Craig Tiller3bc8ebd2015-06-24 15:41:15 -070020
21#include <string.h>
22
Craig Tiller0f310802016-10-26 16:25:56 -070023#include <grpc/slice_buffer.h>
Craig Tiller3bc8ebd2015-06-24 15:41:15 -070024#include <grpc/support/alloc.h>
25#include <grpc/support/log.h>
yang-gb063c872015-10-07 11:40:13 -070026#include <grpc/support/port_platform.h>
Craig Tiller3bc8ebd2015-06-24 15:41:15 -070027#include <grpc/support/string_util.h>
28
Yuchen Zengaa76d3d2017-02-15 14:00:01 -080029#include "src/core/lib/slice/percent_encoding.h"
Yuchen Zengc40d1d82017-02-15 20:42:06 -080030#include "src/core/lib/slice/slice_internal.h"
31#include "src/core/lib/slice/slice_string_helpers.h"
David Garcia Quintase3a49f82016-03-30 17:19:29 -070032#include "src/core/lib/support/string.h"
33
Craig Tillerf29a3882015-09-11 12:59:27 -070034/** a size_t default value... maps to all 1's */
35#define NOT_SET (~(size_t)0)
36
Craig Tillerbaa14a92017-11-03 09:09:36 -070037static grpc_uri* bad_uri(const char* uri_text, size_t pos, const char* section,
David Garcia Quintas53af23c2017-04-15 10:29:46 -070038 bool suppress_errors) {
Craig Tillerbaa14a92017-11-03 09:09:36 -070039 char* line_prefix;
Craig Tiller3121fd42015-09-10 09:56:20 -070040 size_t pfx_len;
Craig Tiller3bc8ebd2015-06-24 15:41:15 -070041
Craig Tillera82950e2015-09-22 12:33:20 -070042 if (!suppress_errors) {
43 gpr_asprintf(&line_prefix, "bad uri.%s: '", section);
44 pfx_len = strlen(line_prefix) + pos;
45 gpr_log(GPR_ERROR, "%s%s'", line_prefix, uri_text);
46 gpr_free(line_prefix);
Craig Tiller3bc8ebd2015-06-24 15:41:15 -070047
Craig Tillerbaa14a92017-11-03 09:09:36 -070048 line_prefix = (char*)gpr_malloc(pfx_len + 1);
Craig Tillera82950e2015-09-22 12:33:20 -070049 memset(line_prefix, ' ', pfx_len);
50 line_prefix[pfx_len] = 0;
51 gpr_log(GPR_ERROR, "%s^ here", line_prefix);
52 gpr_free(line_prefix);
53 }
Craig Tiller3bc8ebd2015-06-24 15:41:15 -070054
Craig Tiller4782d922017-11-10 09:53:21 -080055 return nullptr;
Craig Tiller3bc8ebd2015-06-24 15:41:15 -070056}
57
Yuchen Zengaa76d3d2017-02-15 14:00:01 -080058/** Returns a copy of percent decoded \a src[begin, end) */
Craig Tillerbaa14a92017-11-03 09:09:36 -070059static char* decode_and_copy_component(grpc_exec_ctx* exec_ctx, const char* src,
Yuchen Zengc40d1d82017-02-15 20:42:06 -080060 size_t begin, size_t end) {
Yuchen Zengaa76d3d2017-02-15 14:00:01 -080061 grpc_slice component =
ncteisen5dd0d6f2017-11-07 17:18:55 -080062 (begin == NOT_SET || end == NOT_SET)
63 ? grpc_empty_slice()
64 : grpc_slice_from_copied_buffer(src + begin, end - begin);
Yuchen Zengaa76d3d2017-02-15 14:00:01 -080065 grpc_slice decoded_component =
66 grpc_permissive_percent_decode_slice(component);
Craig Tillerbaa14a92017-11-03 09:09:36 -070067 char* out = grpc_dump_slice(decoded_component, GPR_DUMP_ASCII);
Yuchen Zengc40d1d82017-02-15 20:42:06 -080068 grpc_slice_unref_internal(exec_ctx, component);
69 grpc_slice_unref_internal(exec_ctx, decoded_component);
Craig Tiller3bc8ebd2015-06-24 15:41:15 -070070 return out;
71}
72
Igor Pylypiv282c1392017-04-04 19:44:06 -070073static bool valid_hex(char c) {
74 return ((c >= 'a') && (c <= 'f')) || ((c >= 'A') && (c <= 'F')) ||
75 ((c >= '0') && (c <= '9'));
76}
77
David Garcia Quintas52678912015-09-07 11:28:58 -070078/** Returns how many chars to advance if \a uri_text[i] begins a valid \a pchar
79 * production. If \a uri_text[i] introduces an invalid \a pchar (such as percent
Craig Tillerf29a3882015-09-11 12:59:27 -070080 * sign not followed by two hex digits), NOT_SET is returned. */
Craig Tillerbaa14a92017-11-03 09:09:36 -070081static size_t parse_pchar(const char* uri_text, size_t i) {
David Garcia Quintas52678912015-09-07 11:28:58 -070082 /* pchar = unreserved / pct-encoded / sub-delims / ":" / "@"
83 * unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
84 * pct-encoded = "%" HEXDIG HEXDIG
85 * sub-delims = "!" / "$" / "&" / "'" / "(" / ")"
Craig Tiller45724b32015-09-22 10:42:19 -070086 / "*" / "+" / "," / ";" / "=" */
David Garcia Quintas52678912015-09-07 11:28:58 -070087 char c = uri_text[i];
Igor Pylypiv282c1392017-04-04 19:44:06 -070088 switch (c) {
89 default:
90 if (((c >= 'a') && (c <= 'z')) || ((c >= 'A') && (c <= 'Z')) ||
91 ((c >= '0') && (c <= '9'))) {
92 return 1;
Craig Tillera82950e2015-09-22 12:33:20 -070093 }
Igor Pylypiv282c1392017-04-04 19:44:06 -070094 break;
95 case ':':
96 case '@':
97 case '-':
98 case '.':
99 case '_':
100 case '~':
101 case '!':
102 case '$':
103 case '&':
104 case '\'':
105 case '(':
106 case ')':
107 case '*':
108 case '+':
109 case ',':
110 case ';':
111 case '=':
112 return 1;
113 case '%': /* pct-encoded */
114 if (valid_hex(uri_text[i + 1]) && valid_hex(uri_text[i + 2])) {
115 return 2;
116 }
117 return NOT_SET;
Craig Tillera82950e2015-09-22 12:33:20 -0700118 }
David Garcia Quintas52678912015-09-07 11:28:58 -0700119 return 0;
120}
121
122/* *( pchar / "?" / "/" ) */
Craig Tillerbaa14a92017-11-03 09:09:36 -0700123static int parse_fragment_or_query(const char* uri_text, size_t* i) {
David Garcia Quintas52678912015-09-07 11:28:58 -0700124 char c;
Craig Tillera82950e2015-09-22 12:33:20 -0700125 while ((c = uri_text[*i]) != 0) {
126 const size_t advance = parse_pchar(uri_text, *i); /* pchar */
127 switch (advance) {
128 case 0: /* uri_text[i] isn't in pchar */
129 /* maybe it's ? or / */
130 if (uri_text[*i] == '?' || uri_text[*i] == '/') {
131 (*i)++;
132 break;
133 } else {
134 return 1;
135 }
yang-gb063c872015-10-07 11:40:13 -0700136 GPR_UNREACHABLE_CODE(return 0);
Craig Tillera82950e2015-09-22 12:33:20 -0700137 default:
138 (*i) += advance;
139 break;
140 case NOT_SET: /* uri_text[i] introduces an invalid URI */
141 return 0;
David Garcia Quintas52678912015-09-07 11:28:58 -0700142 }
Craig Tillera82950e2015-09-22 12:33:20 -0700143 }
Craig Tillerf29a3882015-09-11 12:59:27 -0700144 /* *i is the first uri_text position past the \a query production, maybe \0 */
145 return 1;
David Garcia Quintas52678912015-09-07 11:28:58 -0700146}
147
Craig Tillerbaa14a92017-11-03 09:09:36 -0700148static void parse_query_parts(grpc_uri* uri) {
149 static const char* QUERY_PARTS_SEPARATOR = "&";
150 static const char* QUERY_PARTS_VALUE_SEPARATOR = "=";
Craig Tiller4782d922017-11-10 09:53:21 -0800151 GPR_ASSERT(uri->query != nullptr);
David Garcia Quintase3a49f82016-03-30 17:19:29 -0700152 if (uri->query[0] == '\0') {
Craig Tiller4782d922017-11-10 09:53:21 -0800153 uri->query_parts = nullptr;
154 uri->query_parts_values = nullptr;
David Garcia Quintase3a49f82016-03-30 17:19:29 -0700155 uri->num_query_parts = 0;
156 return;
157 }
David Garcia Quintase3a49f82016-03-30 17:19:29 -0700158
Craig Tiller87a7e1f2016-11-09 09:42:19 -0800159 gpr_string_split(uri->query, QUERY_PARTS_SEPARATOR, &uri->query_parts,
160 &uri->num_query_parts);
Yash Tibrewalca3c1c02017-09-07 22:47:16 -0700161 uri->query_parts_values =
Craig Tillerbaa14a92017-11-03 09:09:36 -0700162 (char**)gpr_malloc(uri->num_query_parts * sizeof(char**));
Craig Tiller87a7e1f2016-11-09 09:42:19 -0800163 for (size_t i = 0; i < uri->num_query_parts; i++) {
Craig Tillerbaa14a92017-11-03 09:09:36 -0700164 char** query_param_parts;
Craig Tiller87a7e1f2016-11-09 09:42:19 -0800165 size_t num_query_param_parts;
Craig Tillerbaa14a92017-11-03 09:09:36 -0700166 char* full = uri->query_parts[i];
Craig Tiller87a7e1f2016-11-09 09:42:19 -0800167 gpr_string_split(full, QUERY_PARTS_VALUE_SEPARATOR, &query_param_parts,
168 &num_query_param_parts);
169 GPR_ASSERT(num_query_param_parts > 0);
170 uri->query_parts[i] = query_param_parts[0];
171 if (num_query_param_parts > 1) {
David Garcia Quintase3a49f82016-03-30 17:19:29 -0700172 /* TODO(dgq): only the first value after the separator is considered.
173 * Perhaps all chars after the first separator for the query part should
174 * be included, even if they include the separator. */
Craig Tiller87a7e1f2016-11-09 09:42:19 -0800175 uri->query_parts_values[i] = query_param_parts[1];
David Garcia Quintase3a49f82016-03-30 17:19:29 -0700176 } else {
Craig Tiller4782d922017-11-10 09:53:21 -0800177 uri->query_parts_values[i] = nullptr;
David Garcia Quintase3a49f82016-03-30 17:19:29 -0700178 }
Craig Tiller87a7e1f2016-11-09 09:42:19 -0800179 for (size_t j = 2; j < num_query_param_parts; j++) {
180 gpr_free(query_param_parts[j]);
181 }
182 gpr_free(query_param_parts);
183 gpr_free(full);
David Garcia Quintase3a49f82016-03-30 17:19:29 -0700184 }
David Garcia Quintase3a49f82016-03-30 17:19:29 -0700185}
186
Craig Tillerbaa14a92017-11-03 09:09:36 -0700187grpc_uri* grpc_uri_parse(grpc_exec_ctx* exec_ctx, const char* uri_text,
David Garcia Quintas53af23c2017-04-15 10:29:46 -0700188 bool suppress_errors) {
Craig Tillerbaa14a92017-11-03 09:09:36 -0700189 grpc_uri* uri;
Craig Tiller3121fd42015-09-10 09:56:20 -0700190 size_t scheme_begin = 0;
Craig Tiller3121fd42015-09-10 09:56:20 -0700191 size_t scheme_end = NOT_SET;
192 size_t authority_begin = NOT_SET;
193 size_t authority_end = NOT_SET;
194 size_t path_begin = NOT_SET;
195 size_t path_end = NOT_SET;
Craig Tillerf29a3882015-09-11 12:59:27 -0700196 size_t query_begin = NOT_SET;
197 size_t query_end = NOT_SET;
198 size_t fragment_begin = NOT_SET;
199 size_t fragment_end = NOT_SET;
Craig Tiller3121fd42015-09-10 09:56:20 -0700200 size_t i;
Craig Tiller3bc8ebd2015-06-24 15:41:15 -0700201
Craig Tillera82950e2015-09-22 12:33:20 -0700202 for (i = scheme_begin; uri_text[i] != 0; i++) {
203 if (uri_text[i] == ':') {
204 scheme_end = i;
Craig Tiller3bc8ebd2015-06-24 15:41:15 -0700205 break;
206 }
Craig Tillera82950e2015-09-22 12:33:20 -0700207 if (uri_text[i] >= 'a' && uri_text[i] <= 'z') continue;
208 if (uri_text[i] >= 'A' && uri_text[i] <= 'Z') continue;
209 if (i != scheme_begin) {
210 if (uri_text[i] >= '0' && uri_text[i] <= '9') continue;
211 if (uri_text[i] == '+') continue;
212 if (uri_text[i] == '-') continue;
213 if (uri_text[i] == '.') continue;
Craig Tiller3bc8ebd2015-06-24 15:41:15 -0700214 }
Craig Tillera82950e2015-09-22 12:33:20 -0700215 break;
216 }
217 if (scheme_end == NOT_SET) {
218 return bad_uri(uri_text, i, "scheme", suppress_errors);
219 }
Craig Tiller3bc8ebd2015-06-24 15:41:15 -0700220
Craig Tillera82950e2015-09-22 12:33:20 -0700221 if (uri_text[scheme_end + 1] == '/' && uri_text[scheme_end + 2] == '/') {
222 authority_begin = scheme_end + 3;
223 for (i = authority_begin; uri_text[i] != 0 && authority_end == NOT_SET;
224 i++) {
225 if (uri_text[i] == '/' || uri_text[i] == '?' || uri_text[i] == '#') {
226 authority_end = i;
227 }
Craig Tiller3bc8ebd2015-06-24 15:41:15 -0700228 }
Craig Tillera82950e2015-09-22 12:33:20 -0700229 if (authority_end == NOT_SET && uri_text[i] == 0) {
230 authority_end = i;
Craig Tiller3bc8ebd2015-06-24 15:41:15 -0700231 }
Craig Tillera82950e2015-09-22 12:33:20 -0700232 if (authority_end == NOT_SET) {
233 return bad_uri(uri_text, i, "authority", suppress_errors);
234 }
235 /* TODO(ctiller): parse the authority correctly */
236 path_begin = authority_end;
237 } else {
238 path_begin = scheme_end + 1;
239 }
Craig Tiller3bc8ebd2015-06-24 15:41:15 -0700240
Craig Tillera82950e2015-09-22 12:33:20 -0700241 for (i = path_begin; uri_text[i] != 0; i++) {
242 if (uri_text[i] == '?' || uri_text[i] == '#') {
David Garcia Quintas52678912015-09-07 11:28:58 -0700243 path_end = i;
Craig Tillera82950e2015-09-22 12:33:20 -0700244 break;
Craig Tiller3bc8ebd2015-06-24 15:41:15 -0700245 }
Craig Tillera82950e2015-09-22 12:33:20 -0700246 }
247 if (path_end == NOT_SET && uri_text[i] == 0) {
248 path_end = i;
249 }
250 if (path_end == NOT_SET) {
251 return bad_uri(uri_text, i, "path", suppress_errors);
252 }
David Garcia Quintas52678912015-09-07 11:28:58 -0700253
Craig Tillera82950e2015-09-22 12:33:20 -0700254 if (uri_text[i] == '?') {
255 query_begin = ++i;
256 if (!parse_fragment_or_query(uri_text, &i)) {
257 return bad_uri(uri_text, i, "query", suppress_errors);
258 } else if (uri_text[i] != 0 && uri_text[i] != '#') {
259 /* We must be at the end or at the beginning of a fragment */
260 return bad_uri(uri_text, i, "query", suppress_errors);
David Garcia Quintas52678912015-09-07 11:28:58 -0700261 }
Craig Tillera82950e2015-09-22 12:33:20 -0700262 query_end = i;
263 }
264 if (uri_text[i] == '#') {
265 fragment_begin = ++i;
266 if (!parse_fragment_or_query(uri_text, &i)) {
267 return bad_uri(uri_text, i - fragment_end, "fragment", suppress_errors);
268 } else if (uri_text[i] != 0) {
269 /* We must be at the end */
270 return bad_uri(uri_text, i, "fragment", suppress_errors);
David Garcia Quintas52678912015-09-07 11:28:58 -0700271 }
Craig Tillera82950e2015-09-22 12:33:20 -0700272 fragment_end = i;
273 }
Craig Tiller3bc8ebd2015-06-24 15:41:15 -0700274
Craig Tillerbaa14a92017-11-03 09:09:36 -0700275 uri = (grpc_uri*)gpr_zalloc(sizeof(*uri));
Yuchen Zengc40d1d82017-02-15 20:42:06 -0800276 uri->scheme =
277 decode_and_copy_component(exec_ctx, uri_text, scheme_begin, scheme_end);
278 uri->authority = decode_and_copy_component(exec_ctx, uri_text,
279 authority_begin, authority_end);
280 uri->path =
281 decode_and_copy_component(exec_ctx, uri_text, path_begin, path_end);
282 uri->query =
283 decode_and_copy_component(exec_ctx, uri_text, query_begin, query_end);
284 uri->fragment = decode_and_copy_component(exec_ctx, uri_text, fragment_begin,
285 fragment_end);
David Garcia Quintase3a49f82016-03-30 17:19:29 -0700286 parse_query_parts(uri);
Craig Tiller3bc8ebd2015-06-24 15:41:15 -0700287
288 return uri;
289}
290
Craig Tillerbaa14a92017-11-03 09:09:36 -0700291const char* grpc_uri_get_query_arg(const grpc_uri* uri, const char* key) {
Craig Tiller4782d922017-11-10 09:53:21 -0800292 GPR_ASSERT(key != nullptr);
293 if (key[0] == '\0') return nullptr;
David Garcia Quintas057054f2016-03-31 09:20:42 -0700294
295 for (size_t i = 0; i < uri->num_query_parts; ++i) {
296 if (0 == strcmp(key, uri->query_parts[i])) {
297 return uri->query_parts_values[i];
298 }
299 }
Craig Tiller4782d922017-11-10 09:53:21 -0800300 return nullptr;
David Garcia Quintas057054f2016-03-31 09:20:42 -0700301}
302
Craig Tillerbaa14a92017-11-03 09:09:36 -0700303void grpc_uri_destroy(grpc_uri* uri) {
Craig Tillera82950e2015-09-22 12:33:20 -0700304 if (!uri) return;
305 gpr_free(uri->scheme);
306 gpr_free(uri->authority);
307 gpr_free(uri->path);
308 gpr_free(uri->query);
David Garcia Quintase3a49f82016-03-30 17:19:29 -0700309 for (size_t i = 0; i < uri->num_query_parts; ++i) {
310 gpr_free(uri->query_parts[i]);
311 gpr_free(uri->query_parts_values[i]);
312 }
313 gpr_free(uri->query_parts);
314 gpr_free(uri->query_parts_values);
Craig Tillera82950e2015-09-22 12:33:20 -0700315 gpr_free(uri->fragment);
316 gpr_free(uri);
Craig Tiller3bc8ebd2015-06-24 15:41:15 -0700317}