| /* |
| * Copyright (C) 2007 The Android Open Source Project |
| * |
| * Licensed under the Apache License, Version 2.0 (the "License"); |
| * you may not use this file except in compliance with the License. |
| * You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| package com.google.android.util; |
| |
| import android.annotation.UnsupportedAppUsage; |
| |
| import java.util.ArrayList; |
| import java.util.HashMap; |
| import java.util.LinkedHashMap; |
| import java.util.List; |
| import java.util.Set; |
| import java.util.regex.Matcher; |
| import java.util.regex.Pattern; |
| |
| /** |
| * |
| * Logic for parsing a text message typed by the user looking for smileys, |
| * urls, acronyms,formatting (e.g., '*'s for bold), me commands |
| * (e.g., "/me is asleep"), and punctuation. |
| * |
| * It constructs an array, which breaks the text up into its |
| * constituent pieces, which we return to the client. |
| * |
| */ |
| public abstract class AbstractMessageParser { |
| /** |
| * Interface representing the set of resources needed by a message parser |
| * |
| * @author jessan (Jessan Hutchison-Quillian) |
| */ |
| public static interface Resources { |
| |
| /** Get the known set of URL schemes. */ |
| public Set<String> getSchemes(); |
| |
| /** Get the possible values for the last part of a domain name. |
| * Values are expected to be reversed in the Trie. |
| */ |
| public TrieNode getDomainSuffixes(); |
| |
| /** Get the smileys accepted by the parser. */ |
| public TrieNode getSmileys(); |
| |
| /** Get the acronyms accepted by the parser. */ |
| public TrieNode getAcronyms(); |
| } |
| |
| /** |
| * Subclasses must define the schemes, domains, smileys and acronyms |
| * that are necessary for parsing |
| */ |
| protected abstract Resources getResources(); |
| |
| /** Music note that indicates user is listening to a music track. */ |
| public static final String musicNote = "\u266B "; |
| |
| private String text; |
| private int nextChar; |
| private int nextClass; |
| private ArrayList<Part> parts; |
| private ArrayList<Token> tokens; |
| private HashMap<Character,Format> formatStart; |
| private boolean parseSmilies; |
| private boolean parseAcronyms; |
| private boolean parseFormatting; |
| private boolean parseUrls; |
| private boolean parseMeText; |
| private boolean parseMusic; |
| |
| /** |
| * Create a message parser to parse urls, formatting, acronyms, smileys, |
| * /me text and music |
| * |
| * @param text the text to parse |
| */ |
| public AbstractMessageParser(String text) { |
| this(text, true, true, true, true, true, true); |
| } |
| |
| /** |
| * Create a message parser, specifying the kinds of text to parse |
| * |
| * @param text the text to parse |
| * |
| */ |
| public AbstractMessageParser(String text, boolean parseSmilies, |
| boolean parseAcronyms, boolean parseFormatting, boolean parseUrls, |
| boolean parseMusic, boolean parseMeText) { |
| this.text = text; |
| this.nextChar = 0; |
| this.nextClass = 10; |
| this.parts = new ArrayList<Part>(); |
| this.tokens = new ArrayList<Token>(); |
| this.formatStart = new HashMap<Character,Format>(); |
| this.parseSmilies = parseSmilies; |
| this.parseAcronyms = parseAcronyms; |
| this.parseFormatting = parseFormatting; |
| this.parseUrls = parseUrls; |
| this.parseMusic = parseMusic; |
| this.parseMeText = parseMeText; |
| } |
| |
| /** Returns the raw text being parsed. */ |
| public final String getRawText() { return text; } |
| |
| /** Return the number of parts. */ |
| public final int getPartCount() { return parts.size(); } |
| |
| /** Return the part at the given index. */ |
| public final Part getPart(int index) { return parts.get(index); } |
| |
| /** Return the list of parts from the parsed text */ |
| public final List<Part> getParts() { return parts; } |
| |
| /** Parses the text string into an internal representation. */ |
| public void parse() { |
| // Look for music track (of which there would be only one and it'll be the |
| // first token) |
| if (parseMusicTrack()) { |
| buildParts(null); |
| return; |
| } |
| |
| // Look for me commands. |
| String meText = null; |
| if (parseMeText && text.startsWith("/me") && (text.length() > 3) && |
| Character.isWhitespace(text.charAt(3))) { |
| meText = text.substring(0, 4); |
| text = text.substring(4); |
| } |
| |
| // Break the text into tokens. |
| boolean wasSmiley = false; |
| while (nextChar < text.length()) { |
| if (!isWordBreak(nextChar)) { |
| if (!wasSmiley || !isSmileyBreak(nextChar)) { |
| throw new AssertionError("last chunk did not end at word break"); |
| } |
| } |
| |
| if (parseSmiley()) { |
| wasSmiley = true; |
| } else { |
| wasSmiley = false; |
| |
| if (!parseAcronym() && !parseURL() && !parseFormatting()) { |
| parseText(); |
| } |
| } |
| } |
| |
| // Trim the whitespace before and after media components. |
| for (int i = 0; i < tokens.size(); ++i) { |
| if (tokens.get(i).isMedia()) { |
| if ((i > 0) && (tokens.get(i - 1) instanceof Html)) { |
| ((Html)tokens.get(i - 1)).trimLeadingWhitespace(); |
| } |
| if ((i + 1 < tokens.size()) && (tokens.get(i + 1) instanceof Html)) { |
| ((Html)tokens.get(i + 1)).trimTrailingWhitespace(); |
| } |
| } |
| } |
| |
| // Remove any empty html tokens. |
| for (int i = 0; i < tokens.size(); ++i) { |
| if (tokens.get(i).isHtml() && |
| (tokens.get(i).toHtml(true).length() == 0)) { |
| tokens.remove(i); |
| --i; // visit this index again |
| } |
| } |
| |
| buildParts(meText); |
| } |
| |
| /** |
| * Get a the appropriate Token for a given URL |
| * |
| * @param text the anchor text |
| * @param url the url |
| * |
| */ |
| public static Token tokenForUrl(String url, String text) { |
| if(url == null) { |
| return null; |
| } |
| |
| //Look for video links |
| Video video = Video.matchURL(url, text); |
| if (video != null) { |
| return video; |
| } |
| |
| // Look for video links. |
| YouTubeVideo ytVideo = YouTubeVideo.matchURL(url, text); |
| if (ytVideo != null) { |
| return ytVideo; |
| } |
| |
| // Look for photo links. |
| Photo photo = Photo.matchURL(url, text); |
| if (photo != null) { |
| return photo; |
| } |
| |
| // Look for photo links. |
| FlickrPhoto flickrPhoto = FlickrPhoto.matchURL(url, text); |
| if (flickrPhoto != null) { |
| return flickrPhoto; |
| } |
| |
| //Not media, so must be a regular URL |
| return new Link(url, text); |
| } |
| |
| /** |
| * Builds the parts list. |
| * |
| * @param meText any meText parsed from the message |
| */ |
| private void buildParts(String meText) { |
| for (int i = 0; i < tokens.size(); ++i) { |
| Token token = tokens.get(i); |
| if (token.isMedia() || (parts.size() == 0) || lastPart().isMedia()) { |
| parts.add(new Part()); |
| } |
| lastPart().add(token); |
| } |
| |
| // The first part inherits the meText of the line. |
| if (parts.size() > 0) { |
| parts.get(0).setMeText(meText); |
| } |
| } |
| |
| /** Returns the last part in the list. */ |
| private Part lastPart() { return parts.get(parts.size() - 1); } |
| |
| /** |
| * Looks for a music track (\u266B is first character, everything else is |
| * track info). |
| */ |
| private boolean parseMusicTrack() { |
| |
| if (parseMusic && text.startsWith(musicNote)) { |
| addToken(new MusicTrack(text.substring(musicNote.length()))); |
| nextChar = text.length(); |
| return true; |
| } |
| return false; |
| } |
| |
| /** Consumes all of the text in the next word . */ |
| private void parseText() { |
| StringBuilder buf = new StringBuilder(); |
| int start = nextChar; |
| do { |
| char ch = text.charAt(nextChar++); |
| switch (ch) { |
| case '<': buf.append("<"); break; |
| case '>': buf.append(">"); break; |
| case '&': buf.append("&"); break; |
| case '"': buf.append("""); break; |
| case '\'': buf.append("'"); break; |
| case '\n': buf.append("<br>"); break; |
| default: buf.append(ch); break; |
| } |
| } while (!isWordBreak(nextChar)); |
| |
| addToken(new Html(text.substring(start, nextChar), buf.toString())); |
| } |
| |
| /** |
| * Looks for smileys (e.g., ":)") in the text. The set of known smileys is |
| * loaded from a file into a trie at server start. |
| */ |
| private boolean parseSmiley() { |
| if(!parseSmilies) { |
| return false; |
| } |
| TrieNode match = longestMatch(getResources().getSmileys(), this, nextChar, |
| true); |
| if (match == null) { |
| return false; |
| } else { |
| int previousCharClass = getCharClass(nextChar - 1); |
| int nextCharClass = getCharClass(nextChar + match.getText().length()); |
| if ((previousCharClass == 2 || previousCharClass == 3) |
| && (nextCharClass == 2 || nextCharClass == 3)) { |
| return false; |
| } |
| addToken(new Smiley(match.getText())); |
| nextChar += match.getText().length(); |
| return true; |
| } |
| } |
| |
| /** Looks for acronyms (e.g., "lol") in the text. |
| */ |
| private boolean parseAcronym() { |
| if(!parseAcronyms) { |
| return false; |
| } |
| TrieNode match = longestMatch(getResources().getAcronyms(), this, nextChar); |
| if (match == null) { |
| return false; |
| } else { |
| addToken(new Acronym(match.getText(), match.getValue())); |
| nextChar += match.getText().length(); |
| return true; |
| } |
| } |
| |
| /** Determines if this is an allowable domain character. */ |
| private boolean isDomainChar(char c) { |
| return c == '-' || Character.isLetter(c) || Character.isDigit(c); |
| } |
| |
| /** Determines if the given string is a valid domain. */ |
| private boolean isValidDomain(String domain) { |
| // For hostnames, check that it ends with a known domain suffix |
| if (matches(getResources().getDomainSuffixes(), reverse(domain))) { |
| return true; |
| } |
| return false; |
| } |
| |
| /** |
| * Looks for a URL in two possible forms: either a proper URL with a known |
| * scheme or a domain name optionally followed by a path, query, or query. |
| */ |
| private boolean parseURL() { |
| // Make sure this is a valid place to start a URL. |
| if (!parseUrls || !isURLBreak(nextChar)) { |
| return false; |
| } |
| |
| int start = nextChar; |
| |
| // Search for the first block of letters. |
| int index = start; |
| while ((index < text.length()) && isDomainChar(text.charAt(index))) { |
| index += 1; |
| } |
| |
| String url = ""; |
| boolean done = false; |
| |
| if (index == text.length()) { |
| return false; |
| } else if (text.charAt(index) == ':') { |
| // Make sure this is a known scheme. |
| String scheme = text.substring(nextChar, index); |
| if (!getResources().getSchemes().contains(scheme)) { |
| return false; |
| } |
| } else if (text.charAt(index) == '.') { |
| // Search for the end of the domain name. |
| while (index < text.length()) { |
| char ch = text.charAt(index); |
| if ((ch != '.') && !isDomainChar(ch)) { |
| break; |
| } else { |
| index += 1; |
| } |
| } |
| |
| // Make sure the domain name has a valid suffix. Since tries look for |
| // prefix matches, we reverse all the strings to get suffix comparisons. |
| String domain = text.substring(nextChar, index); |
| if (!isValidDomain(domain)) { |
| return false; |
| } |
| |
| // Search for a port. We deal with this specially because a colon can |
| // also be a punctuation character. |
| if ((index + 1 < text.length()) && (text.charAt(index) == ':')) { |
| char ch = text.charAt(index + 1); |
| if (Character.isDigit(ch)) { |
| index += 1; |
| while ((index < text.length()) && |
| Character.isDigit(text.charAt(index))) { |
| index += 1; |
| } |
| } |
| } |
| |
| // The domain name should be followed by end of line, whitespace, |
| // punctuation, or a colon, slash, question, or hash character. The |
| // tricky part here is that some URL characters are also punctuation, so |
| // we need to distinguish them. Since we looked for ports above, a colon |
| // is always punctuation here. To distinguish '?' cases, we look at the |
| // character that follows it. |
| if (index == text.length()) { |
| done = true; |
| } else { |
| char ch = text.charAt(index); |
| if (ch == '?') { |
| // If the next character is whitespace or punctuation (or missing), |
| // then this question mark looks like punctuation. |
| if (index + 1 == text.length()) { |
| done = true; |
| } else { |
| char ch2 = text.charAt(index + 1); |
| if (Character.isWhitespace(ch2) || isPunctuation(ch2)) { |
| done = true; |
| } |
| } |
| } else if (isPunctuation(ch)) { |
| done = true; |
| } else if (Character.isWhitespace(ch)) { |
| done = true; |
| } else if ((ch == '/') || (ch == '#')) { |
| // In this case, the URL is not done. We will search for the end of |
| // it below. |
| } else { |
| return false; |
| } |
| } |
| |
| // We will assume the user meant HTTP. (One weird case is where they |
| // type a port of 443. That could mean HTTPS, but they might also want |
| // HTTP. We'll let them specify if they don't want HTTP.) |
| url = "http://"; |
| } else { |
| return false; |
| } |
| |
| // If the URL is not done, search for the end, which is just before the |
| // next whitespace character. |
| if (!done) { |
| while ((index < text.length()) && |
| !Character.isWhitespace(text.charAt(index))) { |
| index += 1; |
| } |
| } |
| |
| String urlText = text.substring(start, index); |
| url += urlText; |
| |
| // Figure out the appropriate token type. |
| addURLToken(url, urlText); |
| |
| nextChar = index; |
| return true; |
| } |
| |
| /** |
| * Adds the appropriate token for the given URL. This might be a simple |
| * link or it might be a recognized media type. |
| */ |
| private void addURLToken(String url, String text) { |
| addToken(tokenForUrl(url, text)); |
| } |
| |
| /** |
| * Deal with formatting characters. |
| * |
| * Parsing is as follows: |
| * - Treat all contiguous strings of formatting characters as one block. |
| * (This method processes one block.) |
| * - Only a single instance of a particular format character within a block |
| * is used to determine whether to turn on/off that type of formatting; |
| * other instances simply print the character itself. |
| * - If the format is to be turned on, we use the _first_ instance; if it |
| * is to be turned off, we use the _last_ instance (by appending the |
| * format.) |
| * |
| * Example: |
| * **string** turns into <b>*string*</b> |
| */ |
| private boolean parseFormatting() { |
| if(!parseFormatting) { |
| return false; |
| } |
| int endChar = nextChar; |
| while ((endChar < text.length()) && isFormatChar(text.charAt(endChar))) { |
| endChar += 1; |
| } |
| |
| if ((endChar == nextChar) || !isWordBreak(endChar)) { |
| return false; |
| } |
| |
| // Keeps track of whether we've seen a character (in map if we've seen it) |
| // and whether we should append a closing format token (if value in |
| // map is TRUE). Linked hashmap for consistent ordering. |
| LinkedHashMap<Character, Boolean> seenCharacters = |
| new LinkedHashMap<Character, Boolean>(); |
| |
| for (int index = nextChar; index < endChar; ++index) { |
| char ch = text.charAt(index); |
| Character key = Character.valueOf(ch); |
| if (seenCharacters.containsKey(key)) { |
| // Already seen this character, just append an unmatched token, which |
| // will print plaintext character |
| addToken(new Format(ch, false)); |
| } else { |
| Format start = formatStart.get(key); |
| if (start != null) { |
| // Match the start token, and ask an end token to be appended |
| start.setMatched(true); |
| formatStart.remove(key); |
| seenCharacters.put(key, Boolean.TRUE); |
| } else { |
| // Append start token |
| start = new Format(ch, true); |
| formatStart.put(key, start); |
| addToken(start); |
| seenCharacters.put(key, Boolean.FALSE); |
| } |
| } |
| } |
| |
| // Append any necessary end tokens |
| for (Character key : seenCharacters.keySet()) { |
| if (seenCharacters.get(key) == Boolean.TRUE) { |
| Format end = new Format(key.charValue(), false); |
| end.setMatched(true); |
| addToken(end); |
| } |
| } |
| |
| nextChar = endChar; |
| return true; |
| } |
| |
| /** Determines whether the given index could be a possible word break. */ |
| private boolean isWordBreak(int index) { |
| return getCharClass(index - 1) != getCharClass(index); |
| } |
| |
| /** Determines whether the given index could be a possible smiley break. */ |
| private boolean isSmileyBreak(int index) { |
| if (index > 0 && index < text.length()) { |
| if (isSmileyBreak(text.charAt(index - 1), text.charAt(index))) { |
| return true; |
| } |
| } |
| |
| return false; |
| } |
| |
| /** |
| * Verifies that the character before the given index is end of line, |
| * whitespace, or punctuation. |
| */ |
| private boolean isURLBreak(int index) { |
| switch (getCharClass(index - 1)) { |
| case 2: |
| case 3: |
| case 4: |
| return false; |
| |
| case 0: |
| case 1: |
| default: |
| return true; |
| } |
| } |
| |
| /** Returns the class for the character at the given index. */ |
| private int getCharClass(int index) { |
| if ((index < 0) || (text.length() <= index)) { |
| return 0; |
| } |
| |
| char ch = text.charAt(index); |
| if (Character.isWhitespace(ch)) { |
| return 1; |
| } else if (Character.isLetter(ch)) { |
| return 2; |
| } else if (Character.isDigit(ch)) { |
| return 3; |
| } else if (isPunctuation(ch)) { |
| // For punctuation, we return a unique value every time so that they are |
| // always different from any other character. Punctuation should always |
| // be considered a possible word break. |
| return ++nextClass; |
| } else { |
| return 4; |
| } |
| } |
| |
| /** |
| * Returns true if <code>c1</code> could be the last character of |
| * a smiley and <code>c2</code> could be the first character of |
| * a different smiley, if {@link #isWordBreak} would not already |
| * recognize that this is possible. |
| */ |
| private static boolean isSmileyBreak(char c1, char c2) { |
| switch (c1) { |
| /* |
| * These characters can end smileys, but don't normally end words. |
| */ |
| case '$': case '&': case '*': case '+': case '-': |
| case '/': case '<': case '=': case '>': case '@': |
| case '[': case '\\': case ']': case '^': case '|': |
| case '}': case '~': |
| switch (c2) { |
| /* |
| * These characters can begin smileys, but don't normally |
| * begin words. |
| */ |
| case '#': case '$': case '%': case '*': case '/': |
| case '<': case '=': case '>': case '@': case '[': |
| case '\\': case '^': case '~': |
| return true; |
| } |
| } |
| |
| return false; |
| } |
| |
| /** Determines whether the given character is punctuation. */ |
| private static boolean isPunctuation(char ch) { |
| switch (ch) { |
| case '.': case ',': case '"': case ':': case ';': |
| case '?': case '!': case '(': case ')': |
| return true; |
| |
| default: |
| return false; |
| } |
| } |
| |
| /** |
| * Determines whether the given character is the beginning or end of a |
| * section with special formatting. |
| */ |
| private static boolean isFormatChar(char ch) { |
| switch (ch) { |
| case '*': case '_': case '^': |
| return true; |
| |
| default: |
| return false; |
| } |
| } |
| |
| /** Represents a unit of parsed output. */ |
| public static abstract class Token { |
| @UnsupportedAppUsage(implicitMember = |
| "values()[Lcom/google/android/util/AbstractMessageParser$Token$Type;") |
| public enum Type { |
| |
| @UnsupportedAppUsage |
| HTML ("html"), |
| @UnsupportedAppUsage |
| FORMAT ("format"), // subtype of HTML |
| @UnsupportedAppUsage |
| LINK ("l"), |
| @UnsupportedAppUsage |
| SMILEY ("e"), |
| @UnsupportedAppUsage |
| ACRONYM ("a"), |
| @UnsupportedAppUsage |
| MUSIC ("m"), |
| @UnsupportedAppUsage |
| GOOGLE_VIDEO ("v"), |
| @UnsupportedAppUsage |
| YOUTUBE_VIDEO ("yt"), |
| @UnsupportedAppUsage |
| PHOTO ("p"), |
| @UnsupportedAppUsage |
| FLICKR ("f"); |
| |
| //stringreps for HTML and FORMAT don't really matter |
| //because they don't define getInfo(), which is where it is used |
| //For the other types, code depends on their stringreps |
| private String stringRep; |
| |
| Type(String stringRep) { |
| this.stringRep = stringRep; |
| } |
| |
| /** {@inheritDoc} */ |
| public String toString() { |
| return this.stringRep; |
| } |
| } |
| |
| protected Type type; |
| protected String text; |
| |
| protected Token(Type type, String text) { |
| this.type = type; |
| this.text = text; |
| } |
| |
| /** Returns the type of the token. */ |
| public Type getType() { return type; } |
| |
| /** |
| * Get the relevant information about a token |
| * |
| * @return a list of strings representing the token, not null |
| * The first item is always a string representation of the type |
| */ |
| public List<String> getInfo() { |
| List<String> info = new ArrayList<String>(); |
| info.add(getType().toString()); |
| return info; |
| } |
| |
| /** Returns the raw text of the token. */ |
| public String getRawText() { return text; } |
| |
| public boolean isMedia() { return false; } |
| public abstract boolean isHtml(); |
| public boolean isArray() { return !isHtml(); } |
| |
| public String toHtml(boolean caps) { throw new AssertionError("not html"); } |
| |
| // The token can change the caps of the text after that point. |
| public boolean controlCaps() { return false; } |
| public boolean setCaps() { return false; } |
| } |
| |
| /** Represents a simple string of html text. */ |
| public static class Html extends Token { |
| private String html; |
| |
| public Html(String text, String html) { |
| super(Type.HTML, text); |
| this.html = html; |
| } |
| |
| public boolean isHtml() { return true; } |
| public String toHtml(boolean caps) { |
| return caps ? html.toUpperCase() : html; |
| } |
| /** |
| * Not supported. Info should not be needed for this type |
| */ |
| public List<String> getInfo() { |
| throw new UnsupportedOperationException(); |
| } |
| |
| public void trimLeadingWhitespace() { |
| text = trimLeadingWhitespace(text); |
| html = trimLeadingWhitespace(html); |
| } |
| |
| public void trimTrailingWhitespace() { |
| text = trimTrailingWhitespace(text); |
| html = trimTrailingWhitespace(html); |
| } |
| |
| private static String trimLeadingWhitespace(String text) { |
| int index = 0; |
| while ((index < text.length()) && |
| Character.isWhitespace(text.charAt(index))) { |
| ++index; |
| } |
| return text.substring(index); |
| } |
| |
| public static String trimTrailingWhitespace(String text) { |
| int index = text.length(); |
| while ((index > 0) && Character.isWhitespace(text.charAt(index - 1))) { |
| --index; |
| } |
| return text.substring(0, index); |
| } |
| } |
| |
| /** Represents a music track token at the beginning. */ |
| public static class MusicTrack extends Token { |
| private String track; |
| |
| public MusicTrack(String track) { |
| super(Type.MUSIC, track); |
| this.track = track; |
| } |
| |
| public String getTrack() { return track; } |
| |
| public boolean isHtml() { return false; } |
| |
| public List<String> getInfo() { |
| List<String> info = super.getInfo(); |
| info.add(getTrack()); |
| return info; |
| } |
| } |
| |
| /** Represents a link that was found in the input. */ |
| public static class Link extends Token { |
| private String url; |
| |
| public Link(String url, String text) { |
| super(Type.LINK, text); |
| this.url = url; |
| } |
| |
| public String getURL() { return url; } |
| |
| public boolean isHtml() { return false; } |
| |
| public List<String> getInfo() { |
| List<String> info = super.getInfo(); |
| info.add(getURL()); |
| info.add(getRawText()); |
| return info; |
| } |
| } |
| |
| /** Represents a link to a Google Video. */ |
| public static class Video extends Token { |
| /** Pattern for a video URL. */ |
| private static final Pattern URL_PATTERN = Pattern.compile( |
| "(?i)http://video\\.google\\.[a-z0-9]+(?:\\.[a-z0-9]+)?/videoplay\\?" |
| + ".*?\\bdocid=(-?\\d+).*"); |
| |
| private String docid; |
| |
| public Video(String docid, String text) { |
| super(Type.GOOGLE_VIDEO, text); |
| this.docid = docid; |
| } |
| |
| public String getDocID() { return docid; } |
| |
| public boolean isHtml() { return false; } |
| public boolean isMedia() { return true; } |
| |
| /** Returns a Video object if the given url is to a video. */ |
| public static Video matchURL(String url, String text) { |
| Matcher m = URL_PATTERN.matcher(url); |
| if (m.matches()) { |
| return new Video(m.group(1), text); |
| } else { |
| return null; |
| } |
| } |
| |
| public List<String> getInfo() { |
| List<String> info = super.getInfo(); |
| info.add(getRssUrl(docid)); |
| info.add(getURL(docid)); |
| return info; |
| } |
| |
| /** Returns the URL for the RSS description of the given video. */ |
| public static String getRssUrl(String docid) { |
| return "http://video.google.com/videofeed" |
| + "?type=docid&output=rss&sourceid=gtalk&docid=" + docid; |
| } |
| |
| /** (For testing purposes:) Returns a video URL with the given parts. */ |
| public static String getURL(String docid) { |
| return getURL(docid, null); |
| } |
| |
| /** (For testing purposes:) Returns a video URL with the given parts. */ |
| public static String getURL(String docid, String extraParams) { |
| if (extraParams == null) { |
| extraParams = ""; |
| } else if (extraParams.length() > 0) { |
| extraParams += "&"; |
| } |
| return "http://video.google.com/videoplay?" + extraParams |
| + "docid=" + docid; |
| } |
| } |
| |
| /** Represents a link to a YouTube video. */ |
| public static class YouTubeVideo extends Token { |
| /** Pattern for a video URL. */ |
| private static final Pattern URL_PATTERN = Pattern.compile( |
| "(?i)http://(?:[a-z0-9]+\\.)?youtube\\.[a-z0-9]+(?:\\.[a-z0-9]+)?/watch\\?" |
| + ".*\\bv=([-_a-zA-Z0-9=]+).*"); |
| |
| private String docid; |
| |
| public YouTubeVideo(String docid, String text) { |
| super(Type.YOUTUBE_VIDEO, text); |
| this.docid = docid; |
| } |
| |
| public String getDocID() { return docid; } |
| |
| public boolean isHtml() { return false; } |
| public boolean isMedia() { return true; } |
| |
| /** Returns a Video object if the given url is to a video. */ |
| public static YouTubeVideo matchURL(String url, String text) { |
| Matcher m = URL_PATTERN.matcher(url); |
| if (m.matches()) { |
| return new YouTubeVideo(m.group(1), text); |
| } else { |
| return null; |
| } |
| } |
| |
| public List<String> getInfo() { |
| List<String> info = super.getInfo(); |
| info.add(getRssUrl(docid)); |
| info.add(getURL(docid)); |
| return info; |
| } |
| |
| /** Returns the URL for the RSS description of the given video. */ |
| public static String getRssUrl(String docid) { |
| return "http://youtube.com/watch?v=" + docid; |
| } |
| |
| /** (For testing purposes:) Returns a video URL with the given parts. */ |
| public static String getURL(String docid) { |
| return getURL(docid, null); |
| } |
| |
| /** (For testing purposes:) Returns a video URL with the given parts. */ |
| public static String getURL(String docid, String extraParams) { |
| if (extraParams == null) { |
| extraParams = ""; |
| } else if (extraParams.length() > 0) { |
| extraParams += "&"; |
| } |
| return "http://youtube.com/watch?" + extraParams + "v=" + docid; |
| } |
| |
| /** (For testing purposes:) Returns a video URL with the given parts. |
| * @param http If true, includes http:// |
| * @param prefix If non-null/non-blank, adds to URL before youtube.com. |
| * (e.g., prefix="br." --> "br.youtube.com") |
| */ |
| public static String getPrefixedURL(boolean http, String prefix, |
| String docid, String extraParams) { |
| String protocol = ""; |
| |
| if (http) { |
| protocol = "http://"; |
| } |
| |
| if (prefix == null) { |
| prefix = ""; |
| } |
| |
| if (extraParams == null) { |
| extraParams = ""; |
| } else if (extraParams.length() > 0) { |
| extraParams += "&"; |
| } |
| |
| return protocol + prefix + "youtube.com/watch?" + extraParams + "v=" + |
| docid; |
| } |
| } |
| |
| /** Represents a link to a Picasa photo or album. */ |
| public static class Photo extends Token { |
| /** Pattern for an album or photo URL. */ |
| // TODO (katyarogers) searchbrowse includes search lists and tags, |
| // it follows a different pattern than albums - would be nice to add later |
| private static final Pattern URL_PATTERN = Pattern.compile( |
| "http://picasaweb.google.com/([^/?#&]+)/+((?!searchbrowse)[^/?#&]+)(?:/|/photo)?(?:\\?[^#]*)?(?:#(.*))?"); |
| |
| private String user; |
| private String album; |
| private String photo; // null for albums |
| |
| public Photo(String user, String album, String photo, String text) { |
| super(Type.PHOTO, text); |
| this.user = user; |
| this.album = album; |
| this.photo = photo; |
| } |
| |
| public String getUser() { return user; } |
| public String getAlbum() { return album; } |
| public String getPhoto() { return photo; } |
| |
| public boolean isHtml() { return false; } |
| public boolean isMedia() { return true; } |
| |
| /** Returns a Photo object if the given url is to a photo or album. */ |
| public static Photo matchURL(String url, String text) { |
| Matcher m = URL_PATTERN.matcher(url); |
| if (m.matches()) { |
| return new Photo(m.group(1), m.group(2), m.group(3), text); |
| } else { |
| return null; |
| } |
| } |
| |
| public List<String> getInfo() { |
| List<String> info = super.getInfo(); |
| info.add(getRssUrl(getUser())); |
| info.add(getAlbumURL(getUser(), getAlbum())); |
| if (getPhoto() != null) { |
| info.add(getPhotoURL(getUser(), getAlbum(), getPhoto())); |
| } else { |
| info.add((String)null); |
| } |
| return info; |
| } |
| |
| /** Returns the URL for the RSS description of the user's albums. */ |
| public static String getRssUrl(String user) { |
| return "http://picasaweb.google.com/data/feed/api/user/" + user + |
| "?category=album&alt=rss"; |
| } |
| |
| /** Returns the URL for an album. */ |
| public static String getAlbumURL(String user, String album) { |
| return "http://picasaweb.google.com/" + user + "/" + album; |
| } |
| |
| /** Returns the URL for a particular photo. */ |
| public static String getPhotoURL(String user, String album, String photo) { |
| return "http://picasaweb.google.com/" + user + "/" + album + "/photo#" |
| + photo; |
| } |
| } |
| |
| /** Represents a link to a Flickr photo or album. */ |
| public static class FlickrPhoto extends Token { |
| /** Pattern for a user album or photo URL. */ |
| private static final Pattern URL_PATTERN = Pattern.compile( |
| "http://(?:www.)?flickr.com/photos/([^/?#&]+)/?([^/?#&]+)?/?.*"); |
| private static final Pattern GROUPING_PATTERN = Pattern.compile( |
| "http://(?:www.)?flickr.com/photos/([^/?#&]+)/(tags|sets)/" + |
| "([^/?#&]+)/?"); |
| |
| private static final String SETS = "sets"; |
| private static final String TAGS = "tags"; |
| |
| private String user; |
| private String photo; // null for user album |
| private String grouping; // either "tags" or "sets" |
| private String groupingId; // sets or tags identifier |
| |
| public FlickrPhoto(String user, String photo, String grouping, |
| String groupingId, String text) { |
| super(Type.FLICKR, text); |
| |
| /* System wide tags look like the URL to a Flickr user. */ |
| if (!TAGS.equals(user)) { |
| this.user = user; |
| // Don't consider slide show URL a photo |
| this.photo = (!"show".equals(photo) ? photo : null); |
| this.grouping = grouping; |
| this.groupingId = groupingId; |
| } else { |
| this.user = null; |
| this.photo = null; |
| this.grouping = TAGS; |
| this.groupingId = photo; |
| } |
| } |
| |
| public String getUser() { return user; } |
| public String getPhoto() { return photo; } |
| public String getGrouping() { return grouping; } |
| public String getGroupingId() { return groupingId; } |
| |
| public boolean isHtml() { return false; } |
| public boolean isMedia() { return true; } |
| |
| /** |
| * Returns a FlickrPhoto object if the given url is to a photo or Flickr |
| * user. |
| */ |
| public static FlickrPhoto matchURL(String url, String text) { |
| Matcher m = GROUPING_PATTERN.matcher(url); |
| if (m.matches()) { |
| return new FlickrPhoto(m.group(1), null, m.group(2), m.group(3), text); |
| } |
| |
| m = URL_PATTERN.matcher(url); |
| if (m.matches()) { |
| return new FlickrPhoto(m.group(1), m.group(2), null, null, text); |
| } else { |
| return null; |
| } |
| } |
| |
| public List<String> getInfo() { |
| List<String> info = super.getInfo(); |
| info.add(getUrl()); |
| info.add(getUser() != null ? getUser() : ""); |
| info.add(getPhoto() != null ? getPhoto() : ""); |
| info.add(getGrouping() != null ? getGrouping() : ""); |
| info.add(getGroupingId() != null ? getGroupingId() : ""); |
| return info; |
| } |
| |
| public String getUrl() { |
| if (SETS.equals(grouping)) { |
| return getUserSetsURL(user, groupingId); |
| } else if (TAGS.equals(grouping)) { |
| if (user != null) { |
| return getUserTagsURL(user, groupingId); |
| } else { |
| return getTagsURL(groupingId); |
| } |
| } else if (photo != null) { |
| return getPhotoURL(user, photo); |
| } else { |
| return getUserURL(user); |
| } |
| } |
| |
| /** Returns the URL for the RSS description. */ |
| public static String getRssUrl(String user) { |
| return null; |
| } |
| |
| /** Returns the URL for a particular tag. */ |
| public static String getTagsURL(String tag) { |
| return "http://flickr.com/photos/tags/" + tag; |
| } |
| |
| /** Returns the URL to the user's Flickr homepage. */ |
| public static String getUserURL(String user) { |
| return "http://flickr.com/photos/" + user; |
| } |
| |
| /** Returns the URL for a particular photo. */ |
| public static String getPhotoURL(String user, String photo) { |
| return "http://flickr.com/photos/" + user + "/" + photo; |
| } |
| |
| /** Returns the URL for a user tag photo set. */ |
| public static String getUserTagsURL(String user, String tagId) { |
| return "http://flickr.com/photos/" + user + "/tags/" + tagId; |
| } |
| |
| /** Returns the URL for user set. */ |
| public static String getUserSetsURL(String user, String setId) { |
| return "http://flickr.com/photos/" + user + "/sets/" + setId; |
| } |
| } |
| |
| /** Represents a smiley that was found in the input. */ |
| public static class Smiley extends Token { |
| // TODO: Pass the SWF URL down to the client. |
| |
| public Smiley(String text) { |
| super(Type.SMILEY, text); |
| } |
| |
| public boolean isHtml() { return false; } |
| |
| public List<String> getInfo() { |
| List<String> info = super.getInfo(); |
| info.add(getRawText()); |
| return info; |
| } |
| } |
| |
| /** Represents an acronym that was found in the input. */ |
| public static class Acronym extends Token { |
| private String value; |
| // TODO: SWF |
| |
| public Acronym(String text, String value) { |
| super(Type.ACRONYM, text); |
| this.value = value; |
| } |
| |
| public String getValue() { return value; } |
| |
| public boolean isHtml() { return false; } |
| |
| public List<String> getInfo() { |
| List<String> info = super.getInfo(); |
| info.add(getRawText()); |
| info.add(getValue()); |
| return info; |
| } |
| } |
| |
| /** Represents a character that changes formatting. */ |
| public static class Format extends Token { |
| private char ch; |
| private boolean start; |
| private boolean matched; |
| |
| public Format(char ch, boolean start) { |
| super(Type.FORMAT, String.valueOf(ch)); |
| this.ch = ch; |
| this.start = start; |
| } |
| |
| public void setMatched(boolean matched) { this.matched = matched; } |
| |
| public boolean isHtml() { return true; } |
| |
| public String toHtml(boolean caps) { |
| // This character only implies special formatting if it was matched. |
| // Otherwise, it was just a plain old character. |
| if (matched) { |
| return start ? getFormatStart(ch) : getFormatEnd(ch); |
| } else { |
| // We have to make sure we escape HTML characters as usual. |
| return (ch == '"') ? """ : String.valueOf(ch); |
| } |
| } |
| |
| /** |
| * Not supported. Info should not be needed for this type |
| */ |
| public List<String> getInfo() { |
| throw new UnsupportedOperationException(); |
| } |
| |
| public boolean controlCaps() { return (ch == '^'); } |
| public boolean setCaps() { return start; } |
| |
| private String getFormatStart(char ch) { |
| switch (ch) { |
| case '*': return "<b>"; |
| case '_': return "<i>"; |
| case '^': return "<b><font color=\"#005FFF\">"; // TODO: all caps |
| case '"': return "<font color=\"#999999\">\u201c"; |
| default: throw new AssertionError("unknown format '" + ch + "'"); |
| } |
| } |
| |
| private String getFormatEnd(char ch) { |
| switch (ch) { |
| case '*': return "</b>"; |
| case '_': return "</i>"; |
| case '^': return "</font></b>"; // TODO: all caps |
| case '"': return "\u201d</font>"; |
| default: throw new AssertionError("unknown format '" + ch + "'"); |
| } |
| } |
| } |
| |
| /** Adds the given token to the parsed output. */ |
| private void addToken(Token token) { |
| tokens.add(token); |
| } |
| |
| /** Converts the entire message into a single HTML display string. */ |
| public String toHtml() { |
| StringBuilder html = new StringBuilder(); |
| |
| for (Part part : parts) { |
| boolean caps = false; |
| |
| html.append("<p>"); |
| for (Token token : part.getTokens()) { |
| if (token.isHtml()) { |
| html.append(token.toHtml(caps)); |
| } else { |
| switch (token.getType()) { |
| case LINK: |
| html.append("<a href=\""); |
| html.append(((Link)token).getURL()); |
| html.append("\">"); |
| html.append(token.getRawText()); |
| html.append("</a>"); |
| break; |
| |
| case SMILEY: |
| // TODO: link to an appropriate image |
| html.append(token.getRawText()); |
| break; |
| |
| case ACRONYM: |
| html.append(token.getRawText()); |
| break; |
| |
| case MUSIC: |
| // TODO: include a music glyph |
| html.append(((MusicTrack)token).getTrack()); |
| break; |
| |
| case GOOGLE_VIDEO: |
| // TODO: include a Google Video icon |
| html.append("<a href=\""); |
| html.append(((Video)token).getURL(((Video)token).getDocID())); |
| html.append("\">"); |
| html.append(token.getRawText()); |
| html.append("</a>"); |
| break; |
| |
| case YOUTUBE_VIDEO: |
| // TODO: include a YouTube icon |
| html.append("<a href=\""); |
| html.append(((YouTubeVideo)token).getURL( |
| ((YouTubeVideo)token).getDocID())); |
| html.append("\">"); |
| html.append(token.getRawText()); |
| html.append("</a>"); |
| break; |
| |
| case PHOTO: { |
| // TODO: include a Picasa Web icon |
| html.append("<a href=\""); |
| html.append(Photo.getAlbumURL( |
| ((Photo)token).getUser(), ((Photo)token).getAlbum())); |
| html.append("\">"); |
| html.append(token.getRawText()); |
| html.append("</a>"); |
| break; |
| } |
| |
| case FLICKR: |
| // TODO: include a Flickr icon |
| Photo p = (Photo) token; |
| html.append("<a href=\""); |
| html.append(((FlickrPhoto)token).getUrl()); |
| html.append("\">"); |
| html.append(token.getRawText()); |
| html.append("</a>"); |
| break; |
| |
| default: |
| throw new AssertionError("unknown token type: " + token.getType()); |
| } |
| } |
| |
| if (token.controlCaps()) { |
| caps = token.setCaps(); |
| } |
| } |
| html.append("</p>\n"); |
| } |
| |
| return html.toString(); |
| } |
| |
| /** Returns the reverse of the given string. */ |
| protected static String reverse(String str) { |
| StringBuilder buf = new StringBuilder(); |
| for (int i = str.length() - 1; i >= 0; --i) { |
| buf.append(str.charAt(i)); |
| } |
| return buf.toString(); |
| } |
| |
| public static class TrieNode { |
| private final HashMap<Character,TrieNode> children = |
| new HashMap<Character,TrieNode>(); |
| private String text; |
| private String value; |
| |
| public TrieNode() { this(""); } |
| public TrieNode(String text) { |
| this.text = text; |
| } |
| |
| public final boolean exists() { return value != null; } |
| public final String getText() { return text; } |
| public final String getValue() { return value; } |
| public void setValue(String value) { this.value = value; } |
| |
| public TrieNode getChild(char ch) { |
| return children.get(Character.valueOf(ch)); |
| } |
| |
| public TrieNode getOrCreateChild(char ch) { |
| Character key = Character.valueOf(ch); |
| TrieNode node = children.get(key); |
| if (node == null) { |
| node = new TrieNode(text + String.valueOf(ch)); |
| children.put(key, node); |
| } |
| return node; |
| } |
| |
| /** Adds the given string into the trie. */ |
| public static void addToTrie(TrieNode root, String str, String value) { |
| int index = 0; |
| while (index < str.length()) { |
| root = root.getOrCreateChild(str.charAt(index++)); |
| } |
| root.setValue(value); |
| } |
| } |
| |
| |
| |
| /** Determines whether the given string is in the given trie. */ |
| private static boolean matches(TrieNode root, String str) { |
| int index = 0; |
| while (index < str.length()) { |
| root = root.getChild(str.charAt(index++)); |
| if (root == null) { |
| break; |
| } else if (root.exists()) { |
| return true; |
| } |
| } |
| return false; |
| } |
| |
| /** |
| * Returns the longest substring of the given string, starting at the given |
| * index, that exists in the trie. |
| */ |
| private static TrieNode longestMatch( |
| TrieNode root, AbstractMessageParser p, int start) { |
| return longestMatch(root, p, start, false); |
| } |
| |
| /** |
| * Returns the longest substring of the given string, starting at the given |
| * index, that exists in the trie, with a special tokenizing case for |
| * smileys if specified. |
| */ |
| private static TrieNode longestMatch( |
| TrieNode root, AbstractMessageParser p, int start, boolean smiley) { |
| int index = start; |
| TrieNode bestMatch = null; |
| while (index < p.getRawText().length()) { |
| root = root.getChild(p.getRawText().charAt(index++)); |
| if (root == null) { |
| break; |
| } else if (root.exists()) { |
| if (p.isWordBreak(index)) { |
| bestMatch = root; |
| } else if (smiley && p.isSmileyBreak(index)) { |
| bestMatch = root; |
| } |
| } |
| } |
| return bestMatch; |
| } |
| |
| |
| /** Represents set of tokens that are delivered as a single message. */ |
| public static class Part { |
| private String meText; |
| private ArrayList<Token> tokens; |
| |
| public Part() { |
| this.tokens = new ArrayList<Token>(); |
| } |
| |
| public String getType(boolean isSend) { |
| return (isSend ? "s" : "r") + getPartType(); |
| } |
| |
| private String getPartType() { |
| if (isMedia()) { |
| return "d"; |
| } else if (meText != null) { |
| return "m"; |
| } else { |
| return ""; |
| } |
| } |
| |
| public boolean isMedia() { |
| return (tokens.size() == 1) && tokens.get(0).isMedia(); |
| } |
| /** |
| * Convenience method for getting the Token of a Part that represents |
| * a media Token. Parts of this kind will always only have a single Token |
| * |
| * @return if this.isMedia(), |
| * returns the Token representing the media contained in this Part, |
| * otherwise returns null; |
| */ |
| public Token getMediaToken() { |
| if(isMedia()) { |
| return tokens.get(0); |
| } |
| return null; |
| } |
| |
| /** Adds the given token to this part. */ |
| public void add(Token token) { |
| if (isMedia()) { |
| throw new AssertionError("media "); |
| } |
| tokens.add(token); |
| } |
| |
| public void setMeText(String meText) { |
| this.meText = meText; |
| } |
| |
| /** Returns the original text of this part. */ |
| public String getRawText() { |
| StringBuilder buf = new StringBuilder(); |
| if (meText != null) { |
| buf.append(meText); |
| } |
| for (int i = 0; i < tokens.size(); ++i) { |
| buf.append(tokens.get(i).getRawText()); |
| } |
| return buf.toString(); |
| } |
| |
| /** Returns the tokens in this part. */ |
| public ArrayList<Token> getTokens() { return tokens; } |
| |
| /** Adds the tokens into the given builder as an array. */ |
| // public void toArray(JSArrayBuilder array) { |
| // if (isMedia()) { |
| // // For media, we send its array (i.e., we don't wrap this in another |
| // // array as we do for non-media parts). |
| // tokens.get(0).toArray(array); |
| // } else { |
| // array.beginArray(); |
| // addToArray(array); |
| // array.endArray(); |
| // } |
| // } |
| } |
| } |