blob: b4ee763735e6ed9a8a1211efa2c5f75eab07c9ea [file] [log] [blame]
Craig Tiller3bc8ebd2015-06-24 15:41:15 -07001/*
2 *
Craig Tiller6169d5f2016-03-31 07:46:18 -07003 * Copyright 2015, Google Inc.
Craig Tiller3bc8ebd2015-06-24 15:41:15 -07004 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions are
8 * met:
9 *
10 * * Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * * Redistributions in binary form must reproduce the above
13 * copyright notice, this list of conditions and the following disclaimer
14 * in the documentation and/or other materials provided with the
15 * distribution.
16 * * Neither the name of Google Inc. nor the names of its
17 * contributors may be used to endorse or promote products derived from
18 * this software without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 *
32 */
33
Craig Tiller9533d042016-03-25 17:11:06 -070034#include "src/core/lib/client_config/uri_parser.h"
Craig Tiller3bc8ebd2015-06-24 15:41:15 -070035
36#include <string.h>
37
38#include <grpc/support/alloc.h>
39#include <grpc/support/log.h>
yang-gb063c872015-10-07 11:40:13 -070040#include <grpc/support/port_platform.h>
Craig Tiller3bc8ebd2015-06-24 15:41:15 -070041#include <grpc/support/string_util.h>
42
Craig Tillerf29a3882015-09-11 12:59:27 -070043/** a size_t default value... maps to all 1's */
44#define NOT_SET (~(size_t)0)
45
Craig Tillera82950e2015-09-22 12:33:20 -070046static grpc_uri *bad_uri(const char *uri_text, size_t pos, const char *section,
47 int suppress_errors) {
Craig Tiller3bc8ebd2015-06-24 15:41:15 -070048 char *line_prefix;
Craig Tiller3121fd42015-09-10 09:56:20 -070049 size_t pfx_len;
Craig Tiller3bc8ebd2015-06-24 15:41:15 -070050
Craig Tillera82950e2015-09-22 12:33:20 -070051 if (!suppress_errors) {
52 gpr_asprintf(&line_prefix, "bad uri.%s: '", section);
53 pfx_len = strlen(line_prefix) + pos;
54 gpr_log(GPR_ERROR, "%s%s'", line_prefix, uri_text);
55 gpr_free(line_prefix);
Craig Tiller3bc8ebd2015-06-24 15:41:15 -070056
Craig Tillera82950e2015-09-22 12:33:20 -070057 line_prefix = gpr_malloc(pfx_len + 1);
58 memset(line_prefix, ' ', pfx_len);
59 line_prefix[pfx_len] = 0;
60 gpr_log(GPR_ERROR, "%s^ here", line_prefix);
61 gpr_free(line_prefix);
62 }
Craig Tiller3bc8ebd2015-06-24 15:41:15 -070063
64 return NULL;
65}
66
David Garcia Quintas52678912015-09-07 11:28:58 -070067/** Returns a copy of \a src[begin, end) */
Craig Tillera82950e2015-09-22 12:33:20 -070068static char *copy_component(const char *src, size_t begin, size_t end) {
69 char *out = gpr_malloc(end - begin + 1);
70 memcpy(out, src + begin, end - begin);
Craig Tiller3bc8ebd2015-06-24 15:41:15 -070071 out[end - begin] = 0;
72 return out;
73}
74
David Garcia Quintas52678912015-09-07 11:28:58 -070075/** Returns how many chars to advance if \a uri_text[i] begins a valid \a pchar
76 * production. If \a uri_text[i] introduces an invalid \a pchar (such as percent
Craig Tillerf29a3882015-09-11 12:59:27 -070077 * sign not followed by two hex digits), NOT_SET is returned. */
Craig Tillera82950e2015-09-22 12:33:20 -070078static size_t parse_pchar(const char *uri_text, size_t i) {
David Garcia Quintas52678912015-09-07 11:28:58 -070079 /* pchar = unreserved / pct-encoded / sub-delims / ":" / "@"
80 * unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
81 * pct-encoded = "%" HEXDIG HEXDIG
82 * sub-delims = "!" / "$" / "&" / "'" / "(" / ")"
Craig Tiller45724b32015-09-22 10:42:19 -070083 / "*" / "+" / "," / ";" / "=" */
David Garcia Quintas52678912015-09-07 11:28:58 -070084 char c = uri_text[i];
Craig Tillera82950e2015-09-22 12:33:20 -070085 if (((c >= 'A') && (c <= 'Z')) || ((c >= 'a') && (c <= 'z')) ||
86 ((c >= '0') && (c <= '9')) ||
87 (c == '-' || c == '.' || c == '_' || c == '~') || /* unreserved */
88 (c == '!' || c == '$' || c == '&' || c == '\'' || c == '$' || c == '&' ||
89 c == '(' || c == ')' || c == '*' || c == '+' || c == ',' || c == ';' ||
90 c == '=') /* sub-delims */) {
91 return 1;
92 }
93 if (c == '%') { /* pct-encoded */
94 size_t j;
95 if (uri_text[i + 1] == 0 || uri_text[i + 2] == 0) {
96 return NOT_SET;
David Garcia Quintas52678912015-09-07 11:28:58 -070097 }
Craig Tillera82950e2015-09-22 12:33:20 -070098 for (j = i + 1; j < 2; j++) {
99 c = uri_text[j];
100 if (!(((c >= '0') && (c <= '9')) || ((c >= 'a') && (c <= 'f')) ||
101 ((c >= 'A') && (c <= 'F')))) {
102 return NOT_SET;
103 }
David Garcia Quintas52678912015-09-07 11:28:58 -0700104 }
Craig Tillera82950e2015-09-22 12:33:20 -0700105 return 2;
106 }
David Garcia Quintas52678912015-09-07 11:28:58 -0700107 return 0;
108}
109
110/* *( pchar / "?" / "/" ) */
Craig Tillera82950e2015-09-22 12:33:20 -0700111static int parse_fragment_or_query(const char *uri_text, size_t *i) {
David Garcia Quintas52678912015-09-07 11:28:58 -0700112 char c;
Craig Tillera82950e2015-09-22 12:33:20 -0700113 while ((c = uri_text[*i]) != 0) {
114 const size_t advance = parse_pchar(uri_text, *i); /* pchar */
115 switch (advance) {
116 case 0: /* uri_text[i] isn't in pchar */
117 /* maybe it's ? or / */
118 if (uri_text[*i] == '?' || uri_text[*i] == '/') {
119 (*i)++;
120 break;
121 } else {
122 return 1;
123 }
yang-gb063c872015-10-07 11:40:13 -0700124 GPR_UNREACHABLE_CODE(return 0);
Craig Tillera82950e2015-09-22 12:33:20 -0700125 default:
126 (*i) += advance;
127 break;
128 case NOT_SET: /* uri_text[i] introduces an invalid URI */
129 return 0;
David Garcia Quintas52678912015-09-07 11:28:58 -0700130 }
Craig Tillera82950e2015-09-22 12:33:20 -0700131 }
Craig Tillerf29a3882015-09-11 12:59:27 -0700132 /* *i is the first uri_text position past the \a query production, maybe \0 */
133 return 1;
David Garcia Quintas52678912015-09-07 11:28:58 -0700134}
135
Craig Tillera82950e2015-09-22 12:33:20 -0700136grpc_uri *grpc_uri_parse(const char *uri_text, int suppress_errors) {
Craig Tiller3bc8ebd2015-06-24 15:41:15 -0700137 grpc_uri *uri;
Craig Tiller3121fd42015-09-10 09:56:20 -0700138 size_t scheme_begin = 0;
Craig Tiller3121fd42015-09-10 09:56:20 -0700139 size_t scheme_end = NOT_SET;
140 size_t authority_begin = NOT_SET;
141 size_t authority_end = NOT_SET;
142 size_t path_begin = NOT_SET;
143 size_t path_end = NOT_SET;
Craig Tillerf29a3882015-09-11 12:59:27 -0700144 size_t query_begin = NOT_SET;
145 size_t query_end = NOT_SET;
146 size_t fragment_begin = NOT_SET;
147 size_t fragment_end = NOT_SET;
Craig Tiller3121fd42015-09-10 09:56:20 -0700148 size_t i;
Craig Tiller3bc8ebd2015-06-24 15:41:15 -0700149
Craig Tillera82950e2015-09-22 12:33:20 -0700150 for (i = scheme_begin; uri_text[i] != 0; i++) {
151 if (uri_text[i] == ':') {
152 scheme_end = i;
Craig Tiller3bc8ebd2015-06-24 15:41:15 -0700153 break;
154 }
Craig Tillera82950e2015-09-22 12:33:20 -0700155 if (uri_text[i] >= 'a' && uri_text[i] <= 'z') continue;
156 if (uri_text[i] >= 'A' && uri_text[i] <= 'Z') continue;
157 if (i != scheme_begin) {
158 if (uri_text[i] >= '0' && uri_text[i] <= '9') continue;
159 if (uri_text[i] == '+') continue;
160 if (uri_text[i] == '-') continue;
161 if (uri_text[i] == '.') continue;
Craig Tiller3bc8ebd2015-06-24 15:41:15 -0700162 }
Craig Tillera82950e2015-09-22 12:33:20 -0700163 break;
164 }
165 if (scheme_end == NOT_SET) {
166 return bad_uri(uri_text, i, "scheme", suppress_errors);
167 }
Craig Tiller3bc8ebd2015-06-24 15:41:15 -0700168
Craig Tillera82950e2015-09-22 12:33:20 -0700169 if (uri_text[scheme_end + 1] == '/' && uri_text[scheme_end + 2] == '/') {
170 authority_begin = scheme_end + 3;
171 for (i = authority_begin; uri_text[i] != 0 && authority_end == NOT_SET;
172 i++) {
173 if (uri_text[i] == '/' || uri_text[i] == '?' || uri_text[i] == '#') {
174 authority_end = i;
175 }
Craig Tiller3bc8ebd2015-06-24 15:41:15 -0700176 }
Craig Tillera82950e2015-09-22 12:33:20 -0700177 if (authority_end == NOT_SET && uri_text[i] == 0) {
178 authority_end = i;
Craig Tiller3bc8ebd2015-06-24 15:41:15 -0700179 }
Craig Tillera82950e2015-09-22 12:33:20 -0700180 if (authority_end == NOT_SET) {
181 return bad_uri(uri_text, i, "authority", suppress_errors);
182 }
183 /* TODO(ctiller): parse the authority correctly */
184 path_begin = authority_end;
185 } else {
186 path_begin = scheme_end + 1;
187 }
Craig Tiller3bc8ebd2015-06-24 15:41:15 -0700188
Craig Tillera82950e2015-09-22 12:33:20 -0700189 for (i = path_begin; uri_text[i] != 0; i++) {
190 if (uri_text[i] == '?' || uri_text[i] == '#') {
David Garcia Quintas52678912015-09-07 11:28:58 -0700191 path_end = i;
Craig Tillera82950e2015-09-22 12:33:20 -0700192 break;
Craig Tiller3bc8ebd2015-06-24 15:41:15 -0700193 }
Craig Tillera82950e2015-09-22 12:33:20 -0700194 }
195 if (path_end == NOT_SET && uri_text[i] == 0) {
196 path_end = i;
197 }
198 if (path_end == NOT_SET) {
199 return bad_uri(uri_text, i, "path", suppress_errors);
200 }
David Garcia Quintas52678912015-09-07 11:28:58 -0700201
Craig Tillera82950e2015-09-22 12:33:20 -0700202 if (uri_text[i] == '?') {
203 query_begin = ++i;
204 if (!parse_fragment_or_query(uri_text, &i)) {
205 return bad_uri(uri_text, i, "query", suppress_errors);
206 } else if (uri_text[i] != 0 && uri_text[i] != '#') {
207 /* We must be at the end or at the beginning of a fragment */
208 return bad_uri(uri_text, i, "query", suppress_errors);
David Garcia Quintas52678912015-09-07 11:28:58 -0700209 }
Craig Tillera82950e2015-09-22 12:33:20 -0700210 query_end = i;
211 }
212 if (uri_text[i] == '#') {
213 fragment_begin = ++i;
214 if (!parse_fragment_or_query(uri_text, &i)) {
215 return bad_uri(uri_text, i - fragment_end, "fragment", suppress_errors);
216 } else if (uri_text[i] != 0) {
217 /* We must be at the end */
218 return bad_uri(uri_text, i, "fragment", suppress_errors);
David Garcia Quintas52678912015-09-07 11:28:58 -0700219 }
Craig Tillera82950e2015-09-22 12:33:20 -0700220 fragment_end = i;
221 }
Craig Tiller3bc8ebd2015-06-24 15:41:15 -0700222
Craig Tillera82950e2015-09-22 12:33:20 -0700223 uri = gpr_malloc(sizeof(*uri));
224 memset(uri, 0, sizeof(*uri));
225 uri->scheme = copy_component(uri_text, scheme_begin, scheme_end);
226 uri->authority = copy_component(uri_text, authority_begin, authority_end);
227 uri->path = copy_component(uri_text, path_begin, path_end);
228 uri->query = copy_component(uri_text, query_begin, query_end);
229 uri->fragment = copy_component(uri_text, fragment_begin, fragment_end);
Craig Tiller3bc8ebd2015-06-24 15:41:15 -0700230
231 return uri;
232}
233
Craig Tillera82950e2015-09-22 12:33:20 -0700234void grpc_uri_destroy(grpc_uri *uri) {
235 if (!uri) return;
236 gpr_free(uri->scheme);
237 gpr_free(uri->authority);
238 gpr_free(uri->path);
239 gpr_free(uri->query);
240 gpr_free(uri->fragment);
241 gpr_free(uri);
Craig Tiller3bc8ebd2015-06-24 15:41:15 -0700242}