blob: 57f27cb0ea0f7e53d1104e16ddd019fddce33c5c [file] [log] [blame]
Dirk Doughertyf9d9cb02013-09-13 19:45:17 -07001/*
2 * Copyright (C) 2013 Google Inc.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17package com.google.doclava;
18
Dirk Doughertyc770a6e2014-02-07 20:04:27 -080019import java.io.*;
20import java.text.BreakIterator;
Dirk Doughertyf9d9cb02013-09-13 19:45:17 -070021import java.util.ArrayList;
22import java.util.Collections;
23import java.util.Comparator;
24import java.util.List;
25import java.util.regex.Pattern;
26import java.util.regex.Matcher;
27import java.io.File;
28
29import com.google.clearsilver.jsilver.data.Data;
30
Dirk Doughertyc770a6e2014-02-07 20:04:27 -080031import org.ccil.cowan.tagsoup.*;
32import org.xml.sax.XMLReader;
33import org.xml.sax.InputSource;
34import org.xml.sax.Attributes;
35import org.xml.sax.helpers.DefaultHandler;
36
37import org.w3c.dom.Node;
38import org.w3c.dom.NodeList;
39
40import javax.xml.transform.dom.DOMResult;
41import javax.xml.transform.sax.SAXSource;
42import javax.xml.transform.Transformer;
43import javax.xml.transform.TransformerFactory;
44import javax.xml.xpath.XPath;
45import javax.xml.xpath.XPathConstants;
46import javax.xml.xpath.XPathExpression;
47import javax.xml.xpath.XPathFactory;
48
Dirk Doughertyf9d9cb02013-09-13 19:45:17 -070049/**
50* Metadata associated with a specific documentation page. Extracts
51* metadata based on the page's declared hdf vars (meta.tags and others)
52* as well as implicit data relating to the page, such as url, type, etc.
53* Includes a Node class that represents the metadata and lets it attach
54* to parent/child elements in the tree metadata nodes for all pages.
55* Node also includes methods for rendering the node tree to a json file
56* in docs output, which is then used by JavaScript to load metadata
57* objects into html pages.
58*/
59
60public class PageMetadata {
61 File mSource;
62 String mDest;
63 String mTagList;
64 static boolean sLowercaseTags = true;
65 static boolean sLowercaseKeywords = true;
Dirk Doughertyc770a6e2014-02-07 20:04:27 -080066 //static String linkPrefix = (Doclava.META_DBG) ? "/" : "http://developer.android.com/";
67 /**
68 * regex pattern to match javadoc @link and similar tags. Extracts
69 * root symbol to $1.
70 */
71 private static final Pattern JD_TAG_PATTERN =
72 Pattern.compile("\\{@.*?[\\s\\.\\#]([A-Za-z\\(\\)\\d_]+)(?=\u007D)\u007D");
Dirk Doughertyf9d9cb02013-09-13 19:45:17 -070073
74 public PageMetadata(File source, String dest, List<Node> taglist) {
75 mSource = source;
76 mDest = dest;
77
78 if (dest != null) {
79 int len = dest.length();
80 if (len > 1 && dest.charAt(len - 1) != '/') {
81 mDest = dest + '/';
82 } else {
83 mDest = dest;
84 }
85 }
86 }
87
88 /**
89 * Given a list of metadata nodes organized by type, sort the
90 * root nodes by type name and render the types and their child
91 * metadata nodes to a json file in the out dir.
92 *
93 * @param rootTypeNodesList A list of root metadata nodes, each
94 * representing a type and it's member child pages.
95 */
96 public static void WriteList(List<Node> rootTypeNodesList) {
97
98 Collections.sort(rootTypeNodesList, BY_TYPE_NAME);
99 Node pageMeta = new Node.Builder().setLabel("TOP").setChildren(rootTypeNodesList).build();
100
101 StringBuilder buf = new StringBuilder();
102 // write the taglist to string format
103 pageMeta.renderTypeResources(buf);
104 pageMeta.renderTypesByTag(buf);
105 // write the taglist to js file
106 Data data = Doclava.makeHDF();
107 data.setValue("reference_tree", buf.toString());
108 ClearPage.write(data, "jd_lists_unified.cs", "jd_lists_unified.js");
109 }
110
111 /**
112 * Extract supported metadata values from a page and add them as
113 * a child node of a root node based on type. Some metadata values
114 * are normalized. Unsupported metadata fields are ignored. See
115 * Node for supported metadata fields and methods for accessing values.
116 *
Dirk Doughertyc770a6e2014-02-07 20:04:27 -0800117 * @param docfile The file from which to extract metadata.
Dirk Doughertyf9d9cb02013-09-13 19:45:17 -0700118 * @param dest The output path for the file, used to set link to page.
119 * @param filename The file from which to extract metadata.
120 * @param hdf Data object in which to store the metadata values.
121 * @param tagList The file from which to extract metadata.
Dirk Doughertyf9d9cb02013-09-13 19:45:17 -0700122 */
Dirk Doughertyc770a6e2014-02-07 20:04:27 -0800123 public static void setPageMetadata(String docfile, String dest, String filename,
Dirk Doughertyf9d9cb02013-09-13 19:45:17 -0700124 Data hdf, List<Node> tagList) {
125 //exclude this page if author does not want it included
126 boolean excludeNode = "true".equals(hdf.getValue("excludeFromSuggestions",""));
Dirk Doughertyc770a6e2014-02-07 20:04:27 -0800127
128 //check whether summary and image exist and if not, get them from itemprop/markup
129 Boolean needsSummary = "".equals(hdf.getValue("page.metaDescription", ""));
130 Boolean needsImage = "".equals(hdf.getValue("page.image", ""));
131 if ((needsSummary) || (needsImage)) {
132 //try to extract the metadata from itemprop and markup
133 inferMetadata(docfile, hdf, needsSummary, needsImage);
134 }
135
136 //extract available metadata and set it in a node
Dirk Doughertyf9d9cb02013-09-13 19:45:17 -0700137 if (!excludeNode) {
138 Node pageMeta = new Node.Builder().build();
139 pageMeta.setLabel(getTitleNormalized(hdf, "page.title"));
140 pageMeta.setTitleFriendly(hdf.getValue("page.titleFriendly",""));
Dirk Doughertyc770a6e2014-02-07 20:04:27 -0800141 pageMeta.setSummary(hdf.getValue("page.metaDescription",""));
142 pageMeta.setLink(getPageUrlNormalized(filename));
Dirk Doughertyf9d9cb02013-09-13 19:45:17 -0700143 pageMeta.setGroup(getStringValueNormalized(hdf,"sample.group"));
144 pageMeta.setKeywords(getPageTagsNormalized(hdf, "page.tags"));
145 pageMeta.setTags(getPageTagsNormalized(hdf, "meta.tags"));
Dirk Doughertyc770a6e2014-02-07 20:04:27 -0800146 pageMeta.setImage(getImageUrlNormalized(hdf.getValue("page.image", "")));
Dirk Doughertyf9d9cb02013-09-13 19:45:17 -0700147 pageMeta.setLang(getLangStringNormalized(filename));
148 pageMeta.setType(getStringValueNormalized(hdf, "page.type"));
149 appendMetaNodeByType(pageMeta, tagList);
150 }
Dirk Doughertyc770a6e2014-02-07 20:04:27 -0800151 }
152
153 /**
154 * Attempt to infer page metadata based on the contents of the
155 * file. Load and parse the file as a dom tree. Select values
156 * in this order: 1. dom node specifically tagged with
157 * microdata (itemprop). 2. first qualitifed p or img node.
158 *
159 * @param docfile The file from which to extract metadata.
160 * @param hdf Data object in which to store the metadata values.
161 * @param needsSummary Whether to extract summary metadata.
162 * @param needsImage Whether to extract image metadata.
163 */
164 public static void inferMetadata(String docfile, Data hdf,
165 Boolean needsSummary, Boolean needsImage) {
166 String sum = "";
167 String imageUrl = "";
168 String sumFrom = needsSummary ? "none" : "hdf";
169 String imgFrom = needsImage ? "none" : "hdf";
170 String filedata = hdf.getValue("commentText", "");
171 if (Doclava.META_DBG) System.out.println("----- " + docfile + "\n");
172
173 try {
174 XPathFactory xpathFac = XPathFactory.newInstance();
175 XPath xpath = xpathFac.newXPath();
176 InputStream inputStream = new ByteArrayInputStream(filedata.getBytes());
177 XMLReader reader = new Parser();
178 reader.setFeature(Parser.namespacesFeature, false);
179 reader.setFeature(Parser.namespacePrefixesFeature, false);
180 reader.setFeature(Parser.ignoreBogonsFeature, true);
181
182 Transformer transformer = TransformerFactory.newInstance().newTransformer();
183 DOMResult result = new DOMResult();
184 transformer.transform(new SAXSource(reader, new InputSource(inputStream)), result);
185 org.w3c.dom.Node htmlNode = result.getNode();
186
187 if (needsSummary) {
188 StringBuilder sumStrings = new StringBuilder();
189 XPathExpression ItempropDescExpr = xpath.compile("/descendant-or-self::*"
190 + "[@itemprop='description'][1]//text()[string(.)]");
191 org.w3c.dom.NodeList nodes = (org.w3c.dom.NodeList) ItempropDescExpr.evaluate(htmlNode,
192 XPathConstants.NODESET);
193 if (nodes.getLength() > 0) {
194 for (int i = 0; i < nodes.getLength(); i++) {
195 String tx = nodes.item(i).getNodeValue();
196 sumStrings.append(tx);
197 sumFrom = "itemprop";
198 }
199 } else {
200 XPathExpression FirstParaExpr = xpath.compile("//p[not(../../../"
201 + "@class='notice-developers') and not(../@class='sidebox')"
202 + "and not(@class)]//text()");
203 nodes = (org.w3c.dom.NodeList) FirstParaExpr.evaluate(htmlNode, XPathConstants.NODESET);
204 if (nodes.getLength() > 0) {
205 for (int i = 0; i < nodes.getLength(); i++) {
206 String tx = nodes.item(i).getNodeValue();
207 sumStrings.append(tx + " ");
208 sumFrom = "markup";
209 }
210 }
211 }
212 //found a summary string, now normalize it
213 sum = sumStrings.toString().trim();
214 if ((sum != null) && (!"".equals(sum))) {
215 sum = getSummaryNormalized(sum);
216 }
217 //normalized summary ended up being too short to be meaningful
218 if ("".equals(sum)) {
219 if (Doclava.META_DBG) System.out.println("Warning: description too short! ("
220 + sum.length() + "chars) ...\n\n");
221 }
222 //summary looks good, store it to the file hdf data
223 hdf.setValue("page.metaDescription", sum);
224 }
225 if (needsImage) {
226 XPathExpression ItempropImageExpr = xpath.compile("//*[@itemprop='image']/@src");
227 org.w3c.dom.NodeList imgNodes = (org.w3c.dom.NodeList) ItempropImageExpr.evaluate(htmlNode,
228 XPathConstants.NODESET);
229 if (imgNodes.getLength() > 0) {
230 imageUrl = imgNodes.item(0).getNodeValue();
231 imgFrom = "itemprop";
232 } else {
233 XPathExpression FirstImgExpr = xpath.compile("//img/@src");
234 imgNodes = (org.w3c.dom.NodeList) FirstImgExpr.evaluate(htmlNode, XPathConstants.NODESET);
235 if (imgNodes.getLength() > 0) {
236 //iterate nodes looking for valid image url and normalize.
237 for (int i = 0; i < imgNodes.getLength(); i++) {
238 String tx = imgNodes.item(i).getNodeValue();
239 //qualify and normalize the image
240 imageUrl = getImageUrlNormalized(tx);
241 //this img src did not qualify, keep looking...
242 if ("".equals(imageUrl)) {
243 if (Doclava.META_DBG) System.out.println(" >>>>> Discarded image: " + tx);
244 continue;
245 } else {
246 imgFrom = "markup";
247 break;
248 }
249 }
250 }
251 }
252 //img src url looks good, store it to the file hdf data
253 hdf.setValue("page.image", imageUrl);
254 }
255 if (Doclava.META_DBG) System.out.println("Image (" + imgFrom + "): " + imageUrl);
256 if (Doclava.META_DBG) System.out.println("Summary (" + sumFrom + "): " + sum.length()
257 + " chars\n\n" + sum + "\n");
258 return;
259
260 } catch (Exception e) {
261 if (Doclava.META_DBG) System.out.println(" >>>>> Exception: " + e + "\n");
262 }
Dirk Doughertyf9d9cb02013-09-13 19:45:17 -0700263 }
264
265 /**
266 * Normalize a comma-delimited, multi-string value. Split on commas, remove
267 * quotes, trim whitespace, optionally make keywords/tags lowercase for
268 * easier matching.
269 *
270 * @param hdf Data object in which the metadata values are stored.
271 * @param tag The hdf var from which the metadata was extracted.
272 * @return A normalized string value for the specified tag.
273 */
274 public static String getPageTagsNormalized(Data hdf, String tag) {
275
Dirk Doughertyc770a6e2014-02-07 20:04:27 -0800276 String normTags = "";
Dirk Doughertyf9d9cb02013-09-13 19:45:17 -0700277 StringBuilder tags = new StringBuilder();
278 String tagList = hdf.getValue(tag, "");
Dirk Doughertyc770a6e2014-02-07 20:04:27 -0800279 if (tag.equals("meta.tags") && (tagList.equals(""))) {
280 //use keywords as tags if no meta tags are available
281 tagList = hdf.getValue("page.tags", "");
282 }
Dirk Doughertyf9d9cb02013-09-13 19:45:17 -0700283 if (!tagList.equals("")) {
284 tagList = tagList.replaceAll("\"", "");
Dirk Dougherty54004372015-07-02 13:10:45 -0700285
286 String[] tagParts = tagList.split("[,\u3001]");
Dirk Doughertyf9d9cb02013-09-13 19:45:17 -0700287 for (int iter = 0; iter < tagParts.length; iter++) {
Dirk Doughertyc770a6e2014-02-07 20:04:27 -0800288 tags.append("\"");
Dirk Doughertyf9d9cb02013-09-13 19:45:17 -0700289 if (tag.equals("meta.tags") && sLowercaseTags) {
290 tagParts[iter] = tagParts[iter].toLowerCase();
291 } else if (tag.equals("page.tags") && sLowercaseKeywords) {
292 tagParts[iter] = tagParts[iter].toLowerCase();
293 }
Dirk Doughertyc770a6e2014-02-07 20:04:27 -0800294 if (tag.equals("meta.tags")) {
295 //tags.append("#"); //to match hashtag format used with yt/blogger resources
296 tagParts[iter] = tagParts[iter].replaceAll(" ","");
297 }
Dirk Doughertyf9d9cb02013-09-13 19:45:17 -0700298 tags.append(tagParts[iter].trim());
Dirk Doughertyc770a6e2014-02-07 20:04:27 -0800299 tags.append("\"");
Dirk Doughertyf9d9cb02013-09-13 19:45:17 -0700300 if (iter < tagParts.length - 1) {
301 tags.append(",");
302 }
303 }
304 }
Dirk Doughertyc770a6e2014-02-07 20:04:27 -0800305 //write this back to hdf to expose through js
306 if (tag.equals("meta.tags")) {
307 hdf.setValue(tag, tags.toString());
308 }
Dirk Doughertyf9d9cb02013-09-13 19:45:17 -0700309 return tags.toString();
310 }
311
312 /**
313 * Normalize a string for which only a single value is supported.
314 * Extract the string up to the first comma, remove quotes, remove
315 * any forward-slash prefix, trim any whitespace, optionally make
316 * lowercase for easier matching.
317 *
318 * @param hdf Data object in which the metadata values are stored.
319 * @param tag The hdf var from which the metadata should be extracted.
320 * @return A normalized string value for the specified tag.
321 */
322 public static String getStringValueNormalized(Data hdf, String tag) {
323 StringBuilder outString = new StringBuilder();
324 String tagList = hdf.getValue(tag, "");
Dirk Doughertyc770a6e2014-02-07 20:04:27 -0800325 tagList.replaceAll("\"", "");
Dirk Doughertyf9d9cb02013-09-13 19:45:17 -0700326 if (!tagList.isEmpty()) {
Dirk Doughertyf9d9cb02013-09-13 19:45:17 -0700327 int end = tagList.indexOf(",");
328 if (end != -1) {
329 tagList = tagList.substring(0,end);
330 }
331 tagList = tagList.startsWith("/") ? tagList.substring(1) : tagList;
332 if ("sample.group".equals(tag) && sLowercaseTags) {
333 tagList = tagList.toLowerCase();
334 }
335 outString.append(tagList.trim());
336 }
337 return outString.toString();
338 }
339
340 /**
341 * Normalize a page title. Extract the string, remove quotes, remove
342 * markup, and trim any whitespace.
343 *
344 * @param hdf Data object in which the metadata values are stored.
345 * @param tag The hdf var from which the metadata should be extracted.
346 * @return A normalized string value for the specified tag.
347 */
348 public static String getTitleNormalized(Data hdf, String tag) {
349 StringBuilder outTitle = new StringBuilder();
350 String title = hdf.getValue(tag, "");
351 if (!title.isEmpty()) {
Dirk Doughertyc770a6e2014-02-07 20:04:27 -0800352 title = escapeString(title);
Dirk Doughertyf9d9cb02013-09-13 19:45:17 -0700353 if (title.indexOf("<span") != -1) {
354 String[] splitTitle = title.split("<span(.*?)</span>");
355 title = splitTitle[0];
356 for (int j = 1; j < splitTitle.length; j++) {
357 title.concat(splitTitle[j]);
358 }
359 }
360 outTitle.append(title.trim());
361 }
362 return outTitle.toString();
363 }
364
365 /**
366 * Extract and normalize a page's language string based on the
367 * lowercased dir path. Non-supported langs are ignored and assigned
368 * the default lang string of "en".
369 *
370 * @param filename A path string to the file relative to root.
371 * @return A normalized lang value.
372 */
373 public static String getLangStringNormalized(String filename) {
374 String[] stripStr = filename.toLowerCase().split("\\/");
375 String outFrag = "en";
376 if (stripStr.length > 0) {
377 for (String t : DocFile.DEVSITE_VALID_LANGS) {
378 if ("intl".equals(stripStr[0])) {
379 if (t.equals(stripStr[1])) {
380 outFrag = stripStr[1];
381 break;
382 }
383 }
384 }
385 }
386 return outFrag;
387 }
388
389 /**
Dirk Doughertyc770a6e2014-02-07 20:04:27 -0800390 * Normalize a page summary string and truncate as needed. Strings
391 * exceeding max_chars are truncated at the first word boundary
392 * following the max_size marker. Strings smaller than min_chars
393 * are discarded (as they are assumed to be too little context).
394 *
395 * @param s String extracted from the page as it's summary.
396 * @return A normalized string value.
397 */
398 public static String getSummaryNormalized(String s) {
399 String str = "";
400 int max_chars = 250;
401 int min_chars = 50;
402 int marker = 0;
403 if (s.length() < min_chars) {
404 return str;
405 } else {
406 str = s.replaceAll("^\"|\"$", "");
407 str = str.replaceAll("\\s+", " ");
408 str = JD_TAG_PATTERN.matcher(str).replaceAll("$1");
409 str = escapeString(str);
410 BreakIterator bi = BreakIterator.getWordInstance();
411 bi.setText(str);
412 if (str.length() > max_chars) {
413 marker = bi.following(max_chars);
414 } else {
415 marker = bi.last();
416 }
417 str = str.substring(0, marker);
418 str = str.concat("\u2026" );
419 }
420 return str;
421 }
422
423 public static String escapeString(String s) {
424 s = s.replaceAll("\"", "&quot;");
425 s = s.replaceAll("\'", "&#39;");
426 s = s.replaceAll("<", "&lt;");
427 s = s.replaceAll(">", "&gt;");
428 s = s.replaceAll("/", "&#47;");
429 return s;
430 }
431
432 //Disqualify img src urls that include these substrings
433 public static String[] IMAGE_EXCLUDE = {"/triangle-", "favicon","android-logo",
434 "icon_play.png", "robot-tiny"};
435
436 public static boolean inList(String s, String[] list) {
437 for (String t : list) {
438 if (s.contains(t)) {
439 return true;
440 }
441 }
442 return false;
443 }
444
445 /**
446 * Normalize an img src url by removing docRoot and leading
447 * slash for local image references. These are added later
448 * in js to support offline mode and keep path reference
449 * format consistent with hrefs.
450 *
451 * @param url Abs or rel url sourced from img src.
452 * @return Normalized url if qualified, else empty
453 */
454 public static String getImageUrlNormalized(String url) {
455 String absUrl = "";
456 // validate to avoid choosing using specific images
457 if ((url != null) && (!url.equals("")) && (!inList(url, IMAGE_EXCLUDE))) {
458 absUrl = url.replace("{@docRoot}", "");
459 absUrl = absUrl.replaceFirst("^/(?!/)", "");
460 }
461 return absUrl;
462 }
463
464 /**
465 * Normalize an href url by removing docRoot and leading
466 * slash for local image references. These are added later
467 * in js to support offline mode and keep path reference
468 * format consistent with hrefs.
469 *
470 * @param url Abs or rel page url sourced from href
471 * @return Normalized url, either abs or rel to root
472 */
473 public static String getPageUrlNormalized(String url) {
474 String absUrl = "";
475 if ((url !=null) && (!url.equals(""))) {
476 absUrl = url.replace("{@docRoot}", "");
477 absUrl = absUrl.replaceFirst("^/(?!/)", "");
478 }
479 return absUrl;
480 }
481
482 /**
Dirk Doughertyf9d9cb02013-09-13 19:45:17 -0700483 * Given a metadata node, add it as a child of a root node based on its
484 * type. If there is no root node that matches the node's type, create one
485 * and add the metadata node as a child node.
486 *
487 * @param gNode The node to attach to a root node or add as a new root node.
488 * @param rootList The current list of root nodes.
489 * @return The updated list of root nodes.
490 */
491 public static List<Node> appendMetaNodeByType(Node gNode, List<Node> rootList) {
492
493 String nodeTags = gNode.getType();
494 boolean matched = false;
495 for (Node n : rootList) {
496 if (n.getType().equals(nodeTags)) { //find any matching type node
497 n.getChildren().add(gNode);
498 matched = true;
499 break; // add to the first root node only
500 } // tag did not match
501 } // end rootnodes matching iterator
502 if (!matched) {
503 List<Node> mtaglist = new ArrayList<Node>(); // list of file objects that have a given type
504 mtaglist.add(gNode);
505 Node tnode = new Node.Builder().setChildren(mtaglist).setType(nodeTags).build();
506 rootList.add(tnode);
507 }
508 return rootList;
509 }
510
511 /**
512 * Given a metadata node, add it as a child of a root node based on its
513 * tag. If there is no root node matching the tag, create one for it
514 * and add the metadata node as a child node.
515 *
516 * @param gNode The node to attach to a root node or add as a new root node.
517 * @param rootTagNodesList The current list of root nodes.
518 * @return The updated list of root nodes.
519 */
520 public static List<Node> appendMetaNodeByTagIndex(Node gNode, List<Node> rootTagNodesList) {
521
522 for (int iter = 0; iter < gNode.getChildren().size(); iter++) {
523 if (gNode.getChildren().get(iter).getTags() != null) {
524 List<String> nodeTags = gNode.getChildren().get(iter).getTags();
525 boolean matched = false;
526 for (String t : nodeTags) { //process each of the meta.tags
527 for (Node n : rootTagNodesList) {
528 if (n.getLabel().equals(t.toString())) {
Dirk Doughertyc770a6e2014-02-07 20:04:27 -0800529 n.getTags().add(String.valueOf(iter));
Dirk Doughertyf9d9cb02013-09-13 19:45:17 -0700530 matched = true;
531 break; // add to the first root node only
532 } // tag did not match
533 } // end rootnodes matching iterator
534 if (!matched) {
535 List<String> mtaglist = new ArrayList<String>(); // list of objects with a given tag
536 mtaglist.add(String.valueOf(iter));
537 Node tnode = new Node.Builder().setLabel(t.toString()).setTags(mtaglist).build();
538 rootTagNodesList.add(tnode);
539 }
540 }
541 }
542 }
543 return rootTagNodesList;
544 }
545
546 public static final Comparator<Node> BY_TAG_NAME = new Comparator<Node>() {
547 public int compare (Node one, Node other) {
548 return one.getLabel().compareTo(other.getLabel());
549 }
550 };
551
552 public static final Comparator<Node> BY_TYPE_NAME = new Comparator<Node>() {
553 public int compare (Node one, Node other) {
554 return one.getType().compareTo(other.getType());
555 }
556 };
557
558 /**
559 * A node for storing page metadata. Use Builder.build() to instantiate.
560 */
561 public static class Node {
562
563 private String mLabel; // holds page.title or similar identifier
564 private String mTitleFriendly; // title for card or similar use
565 private String mSummary; // Summary for card or similar use
566 private String mLink; //link href for item click
567 private String mGroup; // from sample.group in _index.jd
568 private List<String> mKeywords; // from page.tags
569 private List<String> mTags; // from meta.tags
570 private String mImage; // holds an href, fully qualified or relative to root
571 private List<Node> mChildren;
572 private String mLang;
573 private String mType; // can be file, dir, video show, announcement, etc.
574
575 private Node(Builder builder) {
576 mLabel = builder.mLabel;
577 mTitleFriendly = builder.mTitleFriendly;
578 mSummary = builder.mSummary;
579 mLink = builder.mLink;
580 mGroup = builder.mGroup;
581 mKeywords = builder.mKeywords;
582 mTags = builder.mTags;
583 mImage = builder.mImage;
584 mChildren = builder.mChildren;
585 mLang = builder.mLang;
586 mType = builder.mType;
587 }
588
589 private static class Builder {
590 private String mLabel, mTitleFriendly, mSummary, mLink, mGroup, mImage, mLang, mType;
591 private List<String> mKeywords = null;
592 private List<String> mTags = null;
593 private List<Node> mChildren = null;
594 public Builder setLabel(String mLabel) { this.mLabel = mLabel; return this;}
595 public Builder setTitleFriendly(String mTitleFriendly) {
596 this.mTitleFriendly = mTitleFriendly; return this;
597 }
598 public Builder setSummary(String mSummary) {this.mSummary = mSummary; return this;}
599 public Builder setLink(String mLink) {this.mLink = mLink; return this;}
600 public Builder setGroup(String mGroup) {this.mGroup = mGroup; return this;}
601 public Builder setKeywords(List<String> mKeywords) {
602 this.mKeywords = mKeywords; return this;
603 }
604 public Builder setTags(List<String> mTags) {this.mTags = mTags; return this;}
605 public Builder setImage(String mImage) {this.mImage = mImage; return this;}
606 public Builder setChildren(List<Node> mChildren) {this.mChildren = mChildren; return this;}
607 public Builder setLang(String mLang) {this.mLang = mLang; return this;}
608 public Builder setType(String mType) {this.mType = mType; return this;}
609 public Node build() {return new Node(this);}
610 }
611
612 /**
613 * Render a tree of metadata nodes organized by type.
614 * @param buf Output buffer to render to.
615 */
616 void renderTypeResources(StringBuilder buf) {
617 List<Node> list = mChildren; //list of type rootnodes
618 if (list == null || list.size() == 0) {
619 buf.append("null");
620 } else {
621 final int n = list.size();
622 for (int i = 0; i < n; i++) {
623 buf.append("var " + list.get(i).mType.toUpperCase() + "_RESOURCES = [");
624 list.get(i).renderTypes(buf); //render this type's children
625 buf.append("\n];\n\n");
626 }
627 }
628 }
629 /**
630 * Render all metadata nodes for a specific type.
631 * @param buf Output buffer to render to.
632 */
633 void renderTypes(StringBuilder buf) {
634 List<Node> list = mChildren;
635 if (list == null || list.size() == 0) {
636 buf.append("nulltype");
637 } else {
638 final int n = list.size();
639 for (int i = 0; i < n; i++) {
640 buf.append("\n {\n");
Dirk Doughertyc770a6e2014-02-07 20:04:27 -0800641 buf.append(" \"title\":\"" + list.get(i).mLabel + "\",\n" );
642 buf.append(" \"titleFriendly\":\"" + list.get(i).mTitleFriendly + "\",\n" );
643 buf.append(" \"summary\":\"" + list.get(i).mSummary + "\",\n" );
644 buf.append(" \"url\":\"" + list.get(i).mLink + "\",\n" );
645 buf.append(" \"group\":\"" + list.get(i).mGroup + "\",\n" );
Dirk Doughertyf9d9cb02013-09-13 19:45:17 -0700646 list.get(i).renderArrayType(buf, list.get(i).mKeywords, "keywords");
647 list.get(i).renderArrayType(buf, list.get(i).mTags, "tags");
Dirk Doughertyc770a6e2014-02-07 20:04:27 -0800648 buf.append(" \"image\":\"" + list.get(i).mImage + "\",\n" );
649 buf.append(" \"lang\":\"" + list.get(i).mLang + "\",\n" );
650 buf.append(" \"type\":\"" + list.get(i).mType + "\"");
Dirk Doughertyf9d9cb02013-09-13 19:45:17 -0700651 buf.append("\n }");
652 if (i != n - 1) {
653 buf.append(", ");
654 }
655 }
656 }
657 }
658
659 /**
660 * Build and render a list of tags associated with each type.
661 * @param buf Output buffer to render to.
662 */
663 void renderTypesByTag(StringBuilder buf) {
664 List<Node> list = mChildren; //list of rootnodes
665 if (list == null || list.size() == 0) {
666 buf.append("null");
667 } else {
668 final int n = list.size();
669 for (int i = 0; i < n; i++) {
670 buf.append("var " + list.get(i).mType.toUpperCase() + "_BY_TAG = {");
671 List<Node> mTagList = new ArrayList(); //list of rootnodes
672 mTagList = appendMetaNodeByTagIndex(list.get(i), mTagList);
673 list.get(i).renderTagIndices(buf, mTagList);
674 buf.append("\n};\n\n");
675 }
676 }
677 }
678
679 /**
680 * Render a list of tags associated with a type, including the
681 * tag's indices in the type array.
682 * @param buf Output buffer to render to.
683 * @param tagList Node tree of types to render.
684 */
685 void renderTagIndices(StringBuilder buf, List<Node> tagList) {
686 List<Node> list = tagList;
687 if (list == null || list.size() == 0) {
Dirk Dougherty20afdc12013-12-09 13:09:22 -0800688 buf.append("");
Dirk Doughertyf9d9cb02013-09-13 19:45:17 -0700689 } else {
690 final int n = list.size();
691 for (int i = 0; i < n; i++) {
Dirk Doughertyf9d9cb02013-09-13 19:45:17 -0700692 buf.append("\n " + list.get(i).mLabel + ":[");
693 renderArrayValue(buf, list.get(i).mTags);
694 buf.append("]");
695 if (i != n - 1) {
696 buf.append(", ");
697 }
698 }
699 }
700 }
701
702 /**
703 * Render key:arrayvalue pair.
704 * @param buf Output buffer to render to.
705 * @param type The list value to render as an arrayvalue.
706 * @param key The key for the pair.
707 */
708 void renderArrayType(StringBuilder buf, List<String> type, String key) {
Dirk Doughertyc770a6e2014-02-07 20:04:27 -0800709 buf.append(" \"" + key + "\": [");
Dirk Doughertyf9d9cb02013-09-13 19:45:17 -0700710 renderArrayValue(buf, type);
711 buf.append("],\n");
712 }
713
714 /**
715 * Render an array value to buf, with special handling of unicode characters.
716 * @param buf Output buffer to render to.
717 * @param type The list value to render as an arrayvalue.
718 */
719 void renderArrayValue(StringBuilder buf, List<String> type) {
720 List<String> list = type;
721 if (list != null) {
722 final int n = list.size();
723 for (int i = 0; i < n; i++) {
724 String tagval = list.get(i).toString();
725 final int L = tagval.length();
726 for (int t = 0; t < L; t++) {
727 char c = tagval.charAt(t);
Dirk Dougherty54004372015-07-02 13:10:45 -0700728 if (c >= Character.MIN_HIGH_SURROGATE && c <= Character.MAX_HIGH_SURROGATE ) {
729 // we have a UTF-16 multi-byte character
730 int codePoint = tagval.codePointAt(t);
731 int charSize = Character.charCount(codePoint);
732 t += charSize - 1;
733 buf.append(String.format("\\u%04x",codePoint));
734 } else if (c >= ' ' && c <= '~' && c != '\\') {
Dirk Doughertyf9d9cb02013-09-13 19:45:17 -0700735 buf.append(c);
Dirk Dougherty54004372015-07-02 13:10:45 -0700736 } else {
737 // we are encoding a two byte character
738 buf.append(String.format("\\u%04x", (int) c));
Dirk Doughertyf9d9cb02013-09-13 19:45:17 -0700739 }
740 }
741 if (i != n - 1) {
742 buf.append(",");
743 }
744 }
745 }
746 }
747
748 public String getLabel() {
749 return mLabel;
750 }
751
752 public void setLabel(String label) {
753 mLabel = label;
754 }
755
756 public String getTitleFriendly() {
757 return mTitleFriendly;
758 }
759
760 public void setTitleFriendly(String title) {
761 mTitleFriendly = title;
762 }
763
764 public String getSummary() {
765 return mSummary;
766 }
767
768 public void setSummary(String summary) {
769 mSummary = summary;
770 }
771
772 public String getLink() {
773 return mLink;
774 }
775
776 public void setLink(String ref) {
777 mLink = ref;
778 }
779
780 public String getGroup() {
781 return mGroup;
782 }
783
784 public void setGroup(String group) {
785 mGroup = group;
786 }
787
788 public List<String> getTags() {
789 return mTags;
790 }
791
Dirk Doughertyf9d9cb02013-09-13 19:45:17 -0700792 public void setTags(String tags) {
Dirk Dougherty8a8f7912013-11-26 18:45:31 -0800793 if ("".equals(tags)) {
Dirk Doughertyf9d9cb02013-09-13 19:45:17 -0700794 mTags = null;
795 } else {
796 List<String> tagList = new ArrayList();
797 String[] tagParts = tags.split(",");
798
799 for (String t : tagParts) {
800 tagList.add(t);
801 }
802 mTags = tagList;
803 }
804 }
805
Dirk Dougherty8a8f7912013-11-26 18:45:31 -0800806 public List<String> getKeywords() {
807 return mKeywords;
808 }
809
810 public void setKeywords(String keywords) {
811 if ("".equals(keywords)) {
812 mKeywords = null;
813 } else {
814 List<String> keywordList = new ArrayList();
815 String[] keywordParts = keywords.split(",");
816
817 for (String k : keywordParts) {
818 keywordList.add(k);
819 }
820 mKeywords = keywordList;
821 }
822 }
823
Dirk Doughertyf9d9cb02013-09-13 19:45:17 -0700824 public String getImage() {
825 return mImage;
826 }
827
828 public void setImage(String ref) {
829 mImage = ref;
830 }
831
832 public List<Node> getChildren() {
833 return mChildren;
834 }
835
836 public void setChildren(List<Node> node) {
837 mChildren = node;
838 }
839
840 public String getLang() {
841 return mLang;
842 }
843
844 public void setLang(String lang) {
845 mLang = lang;
846 }
847
848 public String getType() {
849 return mType;
850 }
851
852 public void setType(String type) {
853 mType = type;
854 }
855 }
856}