Fix handling of double-byte chars for tags, keywords.
Change-Id: I03adc405d087eee5a8735f67525a79357ba6c3d5
diff --git a/src/com/google/doclava/NavTree.java b/src/com/google/doclava/NavTree.java
index cc4f43f..cbe9fee 100644
--- a/src/com/google/doclava/NavTree.java
+++ b/src/com/google/doclava/NavTree.java
@@ -184,7 +184,7 @@
buf.append("\\u");
for (int j = 0; i < 4; i++) {
char x = (char) (c & 0x000f);
- if (x > 10) {
+ if (x >= 10) {
x = (char) (x - 10 + 'a');
} else {
x = (char) (x + '0');
diff --git a/src/com/google/doclava/PageMetadata.java b/src/com/google/doclava/PageMetadata.java
index 360ae84..57f27cb 100644
--- a/src/com/google/doclava/PageMetadata.java
+++ b/src/com/google/doclava/PageMetadata.java
@@ -282,7 +282,8 @@
}
if (!tagList.equals("")) {
tagList = tagList.replaceAll("\"", "");
- String[] tagParts = tagList.split(",");
+
+ String[] tagParts = tagList.split("[,\u3001]");
for (int iter = 0; iter < tagParts.length; iter++) {
tags.append("\"");
if (tag.equals("meta.tags") && sLowercaseTags) {
@@ -724,20 +725,17 @@
final int L = tagval.length();
for (int t = 0; t < L; t++) {
char c = tagval.charAt(t);
- if (c >= ' ' && c <= '~' && c != '\\') {
+ if (c >= Character.MIN_HIGH_SURROGATE && c <= Character.MAX_HIGH_SURROGATE ) {
+ // we have a UTF-16 multi-byte character
+ int codePoint = tagval.codePointAt(t);
+ int charSize = Character.charCount(codePoint);
+ t += charSize - 1;
+ buf.append(String.format("\\u%04x",codePoint));
+ } else if (c >= ' ' && c <= '~' && c != '\\') {
buf.append(c);
- } else {
- buf.append("\\u");
- for (int m = 0; m < 4; m++) {
- char x = (char) (c & 0x000f);
- if (x > 10) {
- x = (char) (x - 10 + 'a');
- } else {
- x = (char) (x + '0');
- }
- buf.append(x);
- c >>= 4;
- }
+ } else {
+ // we are encoding a two byte character
+ buf.append(String.format("\\u%04x", (int) c));
}
}
if (i != n - 1) {