blob: 6f2423d8de899c35a722b4f94579c753f7cfed39 [file] [log] [blame]
Chung-yih Wang600c7a42010-02-08 11:26:23 +08001/*
2* Conditions Of Use
3*
4* This software was developed by employees of the National Institute of
5* Standards and Technology (NIST), an agency of the Federal Government.
6* Pursuant to title 15 Untied States Code Section 105, works of NIST
7* employees are not subject to copyright protection in the United States
8* and are considered to be in the public domain. As a result, a formal
9* license is not needed to use the software.
10*
11* This software is provided by NIST as a service and is expressly
12* provided "AS IS." NIST MAKES NO WARRANTY OF ANY KIND, EXPRESS, IMPLIED
13* OR STATUTORY, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTY OF
14* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NON-INFRINGEMENT
15* AND DATA ACCURACY. NIST does not warrant or make any representations
16* regarding the use of the software or the results thereof, including but
17* not limited to the correctness, accuracy, reliability or usefulness of
18* the software.
19*
20* Permission to use this software is contingent upon your acceptance
21* of the terms of this agreement
22*
23* .
24*
25*/
26package gov.nist.javax.sip.parser;
27import gov.nist.core.HostNameParser;
28import gov.nist.core.HostPort;
29import gov.nist.core.NameValue;
30import gov.nist.core.NameValueList;
31import gov.nist.core.Token;
32import gov.nist.javax.sip.address.GenericURI;
33import gov.nist.javax.sip.address.SipUri;
34import gov.nist.javax.sip.address.TelURLImpl;
35import gov.nist.javax.sip.address.TelephoneNumber;
36import java.text.ParseException;
37
38/**
39 * Parser For SIP and Tel URLs. Other kinds of URL's are handled by the
40 * J2SE 1.4 URL class.
41 * @version 1.2 $Revision: 1.27 $ $Date: 2009/10/22 10:27:39 $
42 *
43 * @author M. Ranganathan <br/>
44 *
45 *
46 */
47public class URLParser extends Parser {
48
49 public URLParser(String url) {
50 this.lexer = new Lexer("sip_urlLexer", url);
51 }
52
53 // public tag added - issued by Miguel Freitas
54 public URLParser(Lexer lexer) {
55 this.lexer = lexer;
56 this.lexer.selectLexer("sip_urlLexer");
57 }
58 protected static boolean isMark(char next) {
59 switch (next) {
60 case '-':
61 case '_':
62 case '.':
63 case '!':
64 case '~':
65 case '*':
66 case '\'':
67 case '(':
68 case ')':
69 return true;
70 default:
71 return false;
72 }
73 }
74
75 protected static boolean isUnreserved(char next) {
76 return Lexer.isAlphaDigit(next) || isMark(next);
77 }
78
79 protected static boolean isReservedNoSlash(char next) {
80 switch (next) {
81 case ';':
82 case '?':
83 case ':':
84 case '@':
85 case '&':
86 case '+':
87 case '$':
88 case ',':
89 return true;
90 default:
91 return false;
92 }
93 }
94
95 // Missing '=' bug in character set - discovered by interop testing
96 // at SIPIT 13 by Bob Johnson and Scott Holben.
97 // change . to ; by Bruno Konik
98 protected static boolean isUserUnreserved(char la) {
99 switch (la) {
100 case '&':
101 case '?':
102 case '+':
103 case '$':
104 case '#':
105 case '/':
106 case ',':
107 case ';':
108 case '=':
109 return true;
110 default:
111 return false;
112 }
113 }
114
115 protected String unreserved() throws ParseException {
116 char next = lexer.lookAhead(0);
117 if (isUnreserved(next)) {
118 lexer.consume(1);
119 return String.valueOf(next);
120 } else
121 throw createParseException("unreserved");
122
123 }
124
125 /** Name or value of a parameter.
126 */
127 protected String paramNameOrValue() throws ParseException {
128 int startIdx = lexer.getPtr();
129 while (lexer.hasMoreChars()) {
130 char next = lexer.lookAhead(0);
131 boolean isValidChar = false;
132 switch (next) {
133 case '[':
134 case ']':// JvB: fixed this one
135 case '/':
136 case ':':
137 case '&':
138 case '+':
139 case '$':
140 isValidChar = true;
141 }
142 if (isValidChar || isUnreserved(next)) {
143 lexer.consume(1);
144 } else if (isEscaped()) {
145 lexer.consume(3);
146 } else
147 break;
148 }
149 return lexer.getBuffer().substring(startIdx, lexer.getPtr());
150 }
151
152 private NameValue uriParam() throws ParseException {
153 if (debug)
154 dbg_enter("uriParam");
155 try {
156 String pvalue = "";
157 String pname = paramNameOrValue();
158 char next = lexer.lookAhead(0);
159 boolean isFlagParam = true;
160 if (next == '=') {
161 lexer.consume(1);
162 pvalue = paramNameOrValue();
163 isFlagParam = false;
164 }
165 if (pname.length() == 0 &&
166 ( pvalue == null ||
167 pvalue.length() == 0))
168 return null;
169 else return new NameValue(pname, pvalue, isFlagParam);
170 } finally {
171 if (debug)
172 dbg_leave("uriParam");
173 }
174 }
175
176 protected static boolean isReserved(char next) {
177 switch (next) {
178 case ';':
179 case '/':
180 case '?':
181 case ':':
182 case '=': // Bug fix by Bruno Konik
183 case '@':
184 case '&':
185 case '+':
186 case '$':
187 case ',':
188 return true;
189 default:
190 return false;
191 }
192 }
193
194 protected String reserved() throws ParseException {
195 char next = lexer.lookAhead(0);
196 if (isReserved(next)) {
197 lexer.consume(1);
198 return new StringBuffer().append(next).toString();
199 } else
200 throw createParseException("reserved");
201 }
202
203 protected boolean isEscaped() {
204 try {
205 return lexer.lookAhead(0) == '%' &&
206 Lexer.isHexDigit(lexer.lookAhead(1)) &&
207 Lexer.isHexDigit(lexer.lookAhead(2));
208 } catch (Exception ex) {
209 return false;
210 }
211 }
212
213 protected String escaped() throws ParseException {
214 if (debug)
215 dbg_enter("escaped");
216 try {
217 StringBuffer retval = new StringBuffer();
218 char next = lexer.lookAhead(0);
219 char next1 = lexer.lookAhead(1);
220 char next2 = lexer.lookAhead(2);
221 if (next == '%'
222 && Lexer.isHexDigit(next1)
223 && Lexer.isHexDigit(next2)) {
224 lexer.consume(3);
225 retval.append(next);
226 retval.append(next1);
227 retval.append(next2);
228 } else
229 throw createParseException("escaped");
230 return retval.toString();
231 } finally {
232 if (debug)
233 dbg_leave("escaped");
234 }
235 }
236
237 protected String mark() throws ParseException {
238 if (debug)
239 dbg_enter("mark");
240 try {
241 char next = lexer.lookAhead(0);
242 if (isMark(next)) {
243 lexer.consume(1);
244 return new String( new char[]{next} );
245 } else
246 throw createParseException("mark");
247 } finally {
248 if (debug)
249 dbg_leave("mark");
250 }
251 }
252
253 protected String uric() {
254 if (debug)
255 dbg_enter("uric");
256 try {
257 try {
258 char la = lexer.lookAhead(0);
259 if (isUnreserved(la)) {
260 lexer.consume(1);
261 return Lexer.charAsString(la);
262 } else if (isReserved(la)) {
263 lexer.consume(1);
264 return Lexer.charAsString(la);
265 } else if (isEscaped()) {
266 String retval = lexer.charAsString(3);
267 lexer.consume(3);
268 return retval;
269 } else
270 return null;
271 } catch (Exception ex) {
272 return null;
273 }
274 } finally {
275 if (debug)
276 dbg_leave("uric");
277 }
278
279 }
280
281 protected String uricNoSlash() {
282 if (debug)
283 dbg_enter("uricNoSlash");
284 try {
285 try {
286 char la = lexer.lookAhead(0);
287 if (isEscaped()) {
288 String retval = lexer.charAsString(3);
289 lexer.consume(3);
290 return retval;
291 } else if (isUnreserved(la)) {
292 lexer.consume(1);
293 return Lexer.charAsString(la);
294 } else if (isReservedNoSlash(la)) {
295 lexer.consume(1);
296 return Lexer.charAsString(la);
297 } else
298 return null;
299 } catch (ParseException ex) {
300 return null;
301 }
302 } finally {
303 if (debug)
304 dbg_leave("uricNoSlash");
305 }
306 }
307
308 protected String uricString() throws ParseException {
309 StringBuffer retval = new StringBuffer();
310 while (true) {
311 String next = uric();
312 if (next == null) {
313 char la = lexer.lookAhead(0);
314 // JvB: allow IPv6 addresses in generic URI strings
315 // e.g. http://[::1]
316 if ( la == '[' ) {
317 HostNameParser hnp = new HostNameParser(this.getLexer());
318 HostPort hp = hnp.hostPort( false );
319 retval.append(hp.toString());
320 continue;
321 }
322 break;
323 }
324 retval.append(next);
325 }
326 return retval.toString();
327 }
328
329 /**
330 * Parse and return a structure for a generic URL.
331 * Note that non SIP URLs are just stored as a string (not parsed).
332 * @return URI is a URL structure for a SIP url.
333 * @throws ParseException if there was a problem parsing.
334 */
335 public GenericURI uriReference( boolean inBrackets ) throws ParseException {
336 if (debug)
337 dbg_enter("uriReference");
338 GenericURI retval = null;
339 Token[] tokens = lexer.peekNextToken(2);
340 Token t1 = (Token) tokens[0];
341 Token t2 = (Token) tokens[1];
342 try {
343
344 if (t1.getTokenType() == TokenTypes.SIP ||
345 t1.getTokenType() == TokenTypes.SIPS) {
346 if (t2.getTokenType() == ':')
347 retval = sipURL( inBrackets );
348 else
349 throw createParseException("Expecting \':\'");
350 } else if (t1.getTokenType() == TokenTypes.TEL) {
351 if (t2.getTokenType() == ':') {
352 retval = telURL( inBrackets );
353 } else
354 throw createParseException("Expecting \':\'");
355 } else {
356 String urlString = uricString();
357 try {
358 retval = new GenericURI(urlString);
359 } catch (ParseException ex) {
360 throw createParseException(ex.getMessage());
361 }
362 }
363 } finally {
364 if (debug)
365 dbg_leave("uriReference");
366 }
367 return retval;
368 }
369
370 /**
371 * Parser for the base phone number.
372 */
373 private String base_phone_number() throws ParseException {
374 StringBuffer s = new StringBuffer();
375
376 if (debug)
377 dbg_enter("base_phone_number");
378 try {
379 int lc = 0;
380 while (lexer.hasMoreChars()) {
381 char w = lexer.lookAhead(0);
382 if (Lexer.isDigit(w)
383 || w == '-'
384 || w == '.'
385 || w == '('
386 || w == ')') {
387 lexer.consume(1);
388 s.append(w);
389 lc++;
390 } else if (lc > 0)
391 break;
392 else
393 throw createParseException("unexpected " + w);
394 }
395 return s.toString();
396 } finally {
397 if (debug)
398 dbg_leave("base_phone_number");
399 }
400
401 }
402
403 /**
404 * Parser for the local phone #.
405 */
406 private String local_number() throws ParseException {
407 StringBuffer s = new StringBuffer();
408 if (debug)
409 dbg_enter("local_number");
410 try {
411 int lc = 0;
412 while (lexer.hasMoreChars()) {
413 char la = lexer.lookAhead(0);
414 if (la == '*'
415 || la == '#'
416 || la == '-'
417 || la == '.'
418 || la == '('
419 || la == ')'
420 // JvB: allow 'A'..'F', should be uppercase
421 || Lexer.isHexDigit(la)) {
422 lexer.consume(1);
423 s.append(la);
424 lc++;
425 } else if (lc > 0)
426 break;
427 else
428 throw createParseException("unexepcted " + la);
429 }
430 return s.toString();
431 } finally {
432 if (debug)
433 dbg_leave("local_number");
434 }
435
436 }
437
438 /**
439 * Parser for telephone subscriber.
440 *
441 * @return the parsed telephone number.
442 */
443 public final TelephoneNumber parseTelephoneNumber( boolean inBrackets )
444 throws ParseException {
445 TelephoneNumber tn;
446
447 if (debug)
448 dbg_enter("telephone_subscriber");
449 lexer.selectLexer("charLexer");
450 try {
451 char c = lexer.lookAhead(0);
452 if (c == '+')
453 tn = global_phone_number( inBrackets );
454 else if (
455 Lexer.isHexDigit(c)// see RFC3966
456 || c == '#'
457 || c == '*'
458 || c == '-'
459 || c == '.'
460 || c == '('
461 || c == ')' ) {
462 tn = local_phone_number( inBrackets );
463 } else
464 throw createParseException("unexpected char " + c);
465 return tn;
466 } finally {
467 if (debug)
468 dbg_leave("telephone_subscriber");
469 }
470
471 }
472
473 private final TelephoneNumber global_phone_number( boolean inBrackets ) throws ParseException {
474 if (debug)
475 dbg_enter("global_phone_number");
476 try {
477 TelephoneNumber tn = new TelephoneNumber();
478 tn.setGlobal(true);
479 NameValueList nv = null;
480 this.lexer.match(PLUS);
481 String b = base_phone_number();
482 tn.setPhoneNumber(b);
483 if (lexer.hasMoreChars()) {
484 char tok = lexer.lookAhead(0);
485 if (tok == ';' && inBrackets) {
486 this.lexer.consume(1);
487 nv = tel_parameters();
488 tn.setParameters(nv);
489 }
490 }
491 return tn;
492 } finally {
493 if (debug)
494 dbg_leave("global_phone_number");
495 }
496 }
497
498 private TelephoneNumber local_phone_number( boolean inBrackets ) throws ParseException {
499 if (debug)
500 dbg_enter("local_phone_number");
501 TelephoneNumber tn = new TelephoneNumber();
502 tn.setGlobal(false);
503 NameValueList nv = null;
504 String b = null;
505 try {
506 b = local_number();
507 tn.setPhoneNumber(b);
508 if (lexer.hasMoreChars()) {
509 Token tok = this.lexer.peekNextToken();
510 switch (tok.getTokenType()) {
511 case SEMICOLON:
512 {
513 if (inBrackets) {
514 this.lexer.consume(1);
515 nv = tel_parameters();
516 tn.setParameters(nv);
517 }
518 break;
519 }
520 default :
521 {
522 break;
523 }
524 }
525 }
526 } finally {
527 if (debug)
528 dbg_leave("local_phone_number");
529 }
530 return tn;
531 }
532
533 private NameValueList tel_parameters() throws ParseException {
534 NameValueList nvList = new NameValueList();
535
536 // JvB: Need to handle 'phone-context' specially
537 // 'isub' (or 'ext') MUST appear first, but we accept any order here
538 NameValue nv;
539 while ( true ) {
540 String pname = paramNameOrValue();
541
542 // Handle 'phone-context' specially, it may start with '+'
543 if ( pname.equalsIgnoreCase("phone-context")) {
544 nv = phone_context();
545 } else {
546 if (lexer.lookAhead(0) == '=') {
547 lexer.consume(1);
548 String value = paramNameOrValue();
549 nv = new NameValue( pname, value, false );
550 } else {
551 nv = new NameValue( pname, "", true );// flag param
552 }
553 }
554 nvList.set( nv );
555
556 if ( lexer.lookAhead(0) == ';' ) {
557 lexer.consume(1);
558 } else {
559 return nvList;
560 }
561 }
562
563 }
564
565 /**
566 * Parses the 'phone-context' parameter in tel: URLs
567 * @throws ParseException
568 */
569 private NameValue phone_context() throws ParseException {
570 lexer.match('=');
571
572 char la = lexer.lookAhead(0);
573 Object value;
574 if (la=='+') {// global-number-digits
575 lexer.consume(1);// skip '+'
576 value = "+" + base_phone_number();
577 } else if ( Lexer.isAlphaDigit(la) ) {
578 Token t = lexer.match( Lexer.ID );// more broad than allowed
579 value = t.getTokenValue();
580 } else {
581 throw new ParseException( "Invalid phone-context:" + la , -1 );
582 }
583 return new NameValue( "phone-context", value, false );
584 }
585
586 /**
587 * Parse and return a structure for a Tel URL.
588 * @return a parsed tel url structure.
589 */
590 public TelURLImpl telURL( boolean inBrackets ) throws ParseException {
591 lexer.match(TokenTypes.TEL);
592 lexer.match(':');
593 TelephoneNumber tn = this.parseTelephoneNumber(inBrackets);
594 TelURLImpl telUrl = new TelURLImpl();
595 telUrl.setTelephoneNumber(tn);
596 return telUrl;
597
598 }
599
600 /**
601 * Parse and return a structure for a SIP URL.
602 * @return a URL structure for a SIP url.
603 * @throws ParseException if there was a problem parsing.
604 */
605 public SipUri sipURL( boolean inBrackets ) throws ParseException {
606 if (debug)
607 dbg_enter("sipURL");
608 SipUri retval = new SipUri();
609 // pmusgrave - handle sips case
610 Token nextToken = lexer.peekNextToken();
611 int sipOrSips = TokenTypes.SIP;
612 String scheme = TokenNames.SIP;
613 if ( nextToken.getTokenType() == TokenTypes.SIPS)
614 {
615 sipOrSips = TokenTypes.SIPS;
616 scheme = TokenNames.SIPS;
617 }
618
619 try {
620 lexer.match(sipOrSips);
621 lexer.match(':');
622 retval.setScheme(scheme);
623 int startOfUser = lexer.markInputPosition();
624 String userOrHost = user();// Note: user may contain ';', host may not...
625 String passOrPort = null;
626
627 // name:password or host:port
628 if ( lexer.lookAhead() == ':' ) {
629 lexer.consume(1);
630 passOrPort = password();
631 }
632
633 // name@hostPort
634 if ( lexer.lookAhead() == '@' ) {
635 lexer.consume(1);
636 retval.setUser( userOrHost );
637 if (passOrPort!=null) retval.setUserPassword( passOrPort );
638 } else {
639 // then userOrHost was a host, backtrack just in case a ';' was eaten...
640 lexer.rewindInputPosition( startOfUser );
641 }
642
643 HostNameParser hnp = new HostNameParser(this.getLexer());
644 HostPort hp = hnp.hostPort( false );
645 retval.setHostPort(hp);
646
647 lexer.selectLexer("charLexer");
648 while (lexer.hasMoreChars()) {
649 // If the URI is not enclosed in brackets, parameters belong to header
650 if (lexer.lookAhead(0) != ';' || !inBrackets)
651 break;
652 lexer.consume(1);
653 NameValue parms = uriParam();
654 if (parms != null) retval.setUriParameter(parms);
655 }
656
657 if (lexer.hasMoreChars() && lexer.lookAhead(0) == '?') {
658 lexer.consume(1);
659 while (lexer.hasMoreChars()) {
660 NameValue parms = qheader();
661 retval.setQHeader(parms);
662 if (lexer.hasMoreChars() && lexer.lookAhead(0) != '&')
663 break;
664 else
665 lexer.consume(1);
666 }
667 }
668 return retval;
Chia-chi Yehb23dbfc2011-11-23 17:27:05 -0800669 // BEGIN android-added
670 } catch (RuntimeException e) {
671 throw new ParseException("Invalid URL: " + lexer.getBuffer(), -1);
672 // END android-added
Chung-yih Wang600c7a42010-02-08 11:26:23 +0800673 } finally {
674 if (debug)
675 dbg_leave("sipURL");
676 }
677 }
678
679 public String peekScheme() throws ParseException {
680 Token[] tokens = lexer.peekNextToken(1);
681 if (tokens.length == 0)
682 return null;
683 String scheme = ((Token) tokens[0]).getTokenValue();
684 return scheme;
685 }
686
687 /**
688 * Get a name value for a given query header (ie one that comes
689 * after the ?).
690 */
691 protected NameValue qheader() throws ParseException {
692 String name = lexer.getNextToken('=');
693 lexer.consume(1);
694 String value = hvalue();
695 return new NameValue(name, value, false);
696
697 }
698
699 protected String hvalue() throws ParseException {
700 StringBuffer retval = new StringBuffer();
701 while (lexer.hasMoreChars()) {
702 char la = lexer.lookAhead(0);
703 // Look for a character that can terminate a URL.
704 boolean isValidChar = false;
705 switch (la) {
706 case '+':
707 case '?':
708 case ':':
709 case '[':
710 case ']':
711 case '/':
712 case '$':
713 case '_':
714 case '-':
715 case '"':
716 case '!':
717 case '~':
718 case '*':
719 case '.':
720 case '(':
721 case ')':
722 isValidChar = true;
723 }
724 if (isValidChar || Lexer.isAlphaDigit(la)) {
725 lexer.consume(1);
726 retval.append(la);
727 } else if (la == '%') {
728 retval.append(escaped());
729 } else
730 break;
731 }
732 return retval.toString();
733 }
734
735 /**
736 * Scan forward until you hit a terminating character for a URL.
737 * We do not handle non sip urls in this implementation.
738 * @return the string that takes us to the end of this URL (i.e. to
739 * the next delimiter).
740 */
741 protected String urlString() throws ParseException {
742 StringBuffer retval = new StringBuffer();
743 lexer.selectLexer("charLexer");
744
745 while (lexer.hasMoreChars()) {
746 char la = lexer.lookAhead(0);
747 // Look for a character that can terminate a URL.
748 if (la == ' '
749 || la == '\t'
750 || la == '\n'
751 || la == '>'
752 || la == '<')
753 break;
754 lexer.consume(0);
755 retval.append(la);
756 }
757 return retval.toString();
758 }
759
760 protected String user() throws ParseException {
761 if (debug)
762 dbg_enter("user");
763 try {
764 int startIdx = lexer.getPtr();
765 while (lexer.hasMoreChars()) {
766 char la = lexer.lookAhead(0);
767 if (isUnreserved(la) || isUserUnreserved(la)) {
768 lexer.consume(1);
769 } else if (isEscaped()) {
770 lexer.consume(3);
771 } else
772 break;
773 }
774 return lexer.getBuffer().substring(startIdx, lexer.getPtr());
775 } finally {
776 if (debug)
777 dbg_leave("user");
778 }
779
780 }
781
782 protected String password() throws ParseException {
783 int startIdx = lexer.getPtr();
784 while (true) {
785 char la = lexer.lookAhead(0);
786 boolean isValidChar = false;
787 switch (la) {
788 case '&':
789 case '=':
790 case '+':
791 case '$':
792 case ',':
793 isValidChar = true;
794 }
795 if (isValidChar || isUnreserved(la)) {
796 lexer.consume(1);
797 } else if (isEscaped()) {
798 lexer.consume(3); // bug reported by
799 // Jeff Haynie
800 } else
801 break;
802
803 }
804 return lexer.getBuffer().substring(startIdx, lexer.getPtr());
805 }
806
807 /**
808 * Default parse method. This method just calls uriReference.
809 */
810 public GenericURI parse() throws ParseException {
811 return uriReference( true );
812 }
813
814 // quick test routine for debugging type assignment
815 public static void main(String[] args) throws ParseException
816 {
817 // quick test for sips parsing
818 String[] test = { "sip:alice@example.com",
819 "sips:alice@examples.com" ,
820 "sip:3Zqkv5dajqaaas0tCjCxT0xH2ZEuEMsFl0xoasip%3A%2B3519116786244%40siplab.domain.com@213.0.115.163:7070"};
821
822 for ( int i = 0; i < test.length; i++)
823 {
824 URLParser p = new URLParser(test[i]);
825
826 GenericURI uri = p.parse();
827 System.out.println("uri type returned " + uri.getClass().getName());
828 System.out.println(test[i] + " is SipUri? " + uri.isSipURI()
829 + ">" + uri.encode());
830 }
831 }
832
833 /**
834
835 **/
836}
837