Revert "Infer document and sample metadata when generating docs."
This reverts commit 3cdd961719e41b26b129abbeefa644f09a8e449c.
Change-Id: I991368d53b29cbd308de5ffcfe4f7899b277ca64
diff --git a/src/com/google/doclava/DocFile.java b/src/com/google/doclava/DocFile.java
index c777bd9..a00bac5 100644
--- a/src/com/google/doclava/DocFile.java
+++ b/src/com/google/doclava/DocFile.java
@@ -72,7 +72,7 @@
}
return outFrag;
}
-
+
public static Data getPageMetadata (String docfile, Data hdf) {
//utility method for extracting metadata without generating file output.
if (hdf == null) {
@@ -230,9 +230,6 @@
} else if (filename.indexOf("compatibility") == 0) {
hdf.setValue("compatibility", "true");
}
- //set metadata for this file in jd_lists_unified
- PageMetadata.setPageMetadata(docfile, relative, outfile, hdf, Doclava.sTaglist);
-
if (fromTemplate.equals("sdk")) {
ClearPage.write(hdf, "sdkpage.cs", outfile);
} else {
diff --git a/src/com/google/doclava/Doclava.java b/src/com/google/doclava/Doclava.java
index f335e20..213492a 100644
--- a/src/com/google/doclava/Doclava.java
+++ b/src/com/google/doclava/Doclava.java
@@ -768,6 +768,11 @@
Data data = makeHDF();
String filename = templ.substring(0, len - 3) + htmlExtension;
DocFile.writePage(f.getAbsolutePath(), relative, filename, data);
+ String[] sections = relative.split("\\/");
+ boolean isIntl = ((sections.length > 0) && (sections[0].equals("intl")));
+ //if (!isIntl) {
+ PageMetadata.setPageMetadata(f, relative, filename, data, sTaglist);
+ //}
} else if(!f.getName().equals(".DS_Store")){
Data data = makeHDF();
String hdfValue = data.getValue("sac") == null ? "" : data.getValue("sac");
diff --git a/src/com/google/doclava/LinkReference.java b/src/com/google/doclava/LinkReference.java
index 816bdb1..dfece8e 100644
--- a/src/com/google/doclava/LinkReference.java
+++ b/src/com/google/doclava/LinkReference.java
@@ -59,7 +59,7 @@
public boolean good;
/**
- * regex pattern to use when matching explicit 'a href' reference text
+ * regex pattern to use when matching explicit "<a href" reference text
*/
private static final Pattern HREF_PATTERN =
Pattern.compile("^<a href=\"([^\"]*)\">([^<]*)</a>[ \n\r\t]*$", Pattern.CASE_INSENSITIVE);
diff --git a/src/com/google/doclava/PageMetadata.java b/src/com/google/doclava/PageMetadata.java
index 8825bb2..c22ac0f 100644
--- a/src/com/google/doclava/PageMetadata.java
+++ b/src/com/google/doclava/PageMetadata.java
@@ -16,8 +16,6 @@
package com.google.doclava;
-import java.io.*;
-import java.text.BreakIterator;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
@@ -28,24 +26,6 @@
import com.google.clearsilver.jsilver.data.Data;
-import org.ccil.cowan.tagsoup.*;
-import org.xml.sax.XMLReader;
-import org.xml.sax.InputSource;
-import org.xml.sax.Attributes;
-import org.xml.sax.helpers.DefaultHandler;
-
-import org.w3c.dom.Node;
-import org.w3c.dom.NodeList;
-
-import javax.xml.transform.dom.DOMResult;
-import javax.xml.transform.sax.SAXSource;
-import javax.xml.transform.Transformer;
-import javax.xml.transform.TransformerFactory;
-import javax.xml.xpath.XPath;
-import javax.xml.xpath.XPathConstants;
-import javax.xml.xpath.XPathExpression;
-import javax.xml.xpath.XPathFactory;
-
/**
* Metadata associated with a specific documentation page. Extracts
* metadata based on the page's declared hdf vars (meta.tags and others)
@@ -63,14 +43,6 @@
String mTagList;
static boolean sLowercaseTags = true;
static boolean sLowercaseKeywords = true;
- private static final boolean DBG = false;
-
- /**
- * regex pattern to match javadoc @link and similar tags. Extracts
- * root symbol to $1.
- */
- private static final Pattern JD_TAG_PATTERN =
- Pattern.compile("\\{@.*?[\\s\\.\\#]([A-Za-z\\(\\)\\d_]+)(?=\u007D)\u007D");
public PageMetadata(File source, String dest, List<Node> taglist) {
mSource = source;
@@ -115,156 +87,32 @@
* are normalized. Unsupported metadata fields are ignored. See
* Node for supported metadata fields and methods for accessing values.
*
- * @param docfile The file from which to extract metadata.
+ * @param file The file from which to extract metadata.
* @param dest The output path for the file, used to set link to page.
* @param filename The file from which to extract metadata.
* @param hdf Data object in which to store the metadata values.
* @param tagList The file from which to extract metadata.
+ * @return tagList with new node added.
*/
- public static void setPageMetadata(String docfile, String dest, String filename,
+ public static List<Node> setPageMetadata(File file, String dest, String filename,
Data hdf, List<Node> tagList) {
//exclude this page if author does not want it included
boolean excludeNode = "true".equals(hdf.getValue("excludeFromSuggestions",""));
-
- //check whether summary and image exist and if not, get them from itemprop/markup
- Boolean needsSummary = "".equals(hdf.getValue("page.metaDescription", ""));
- Boolean needsImage = "".equals(hdf.getValue("page.image", ""));
- if ((needsSummary) || (needsImage)) {
- //try to extract the metadata from itemprop and markup
- inferMetadata(docfile, hdf, needsSummary, needsImage);
- }
-
- //extract available metadata and set it in a node
if (!excludeNode) {
Node pageMeta = new Node.Builder().build();
pageMeta.setLabel(getTitleNormalized(hdf, "page.title"));
pageMeta.setTitleFriendly(hdf.getValue("page.titleFriendly",""));
- pageMeta.setSummary(hdf.getValue("page.metaDescription",""));
- pageMeta.setLink(getPageUrlNormalized(filename));
+ pageMeta.setSummary(hdf.getValue("page.summary",""));
+ pageMeta.setLink(filename);
pageMeta.setGroup(getStringValueNormalized(hdf,"sample.group"));
pageMeta.setKeywords(getPageTagsNormalized(hdf, "page.tags"));
pageMeta.setTags(getPageTagsNormalized(hdf, "meta.tags"));
- //use keywords as tags if no tags are available
- if (pageMeta.getTags() == null) {
- pageMeta.setTags(getPageTagsNormalized(hdf, "page.tags"));
- }
- pageMeta.setImage(getImageUrlNormalized(hdf.getValue("page.image", "")));
+ pageMeta.setImage(getStringValueNormalized(hdf, "page.image"));
pageMeta.setLang(getLangStringNormalized(filename));
pageMeta.setType(getStringValueNormalized(hdf, "page.type"));
appendMetaNodeByType(pageMeta, tagList);
}
- }
-
- /**
- * Attempt to infer page metadata based on the contents of the
- * file. Load and parse the file as a dom tree. Select values
- * in this order: 1. dom node specifically tagged with
- * microdata (itemprop). 2. first qualitifed p or img node.
- *
- * @param docfile The file from which to extract metadata.
- * @param hdf Data object in which to store the metadata values.
- * @param needsSummary Whether to extract summary metadata.
- * @param needsImage Whether to extract image metadata.
- */
- public static void inferMetadata(String docfile, Data hdf,
- Boolean needsSummary, Boolean needsImage) {
- String sum = "";
- String imageUrl = "";
- String sumFrom = needsSummary ? "none" : "hdf";
- String imgFrom = needsImage ? "none" : "hdf";
- String filedata = hdf.getValue("commentText", "");
- if (DBG) System.out.println("----- " + docfile + "\n");
-
- try {
- XPathFactory xpathFac = XPathFactory.newInstance();
- XPath xpath = xpathFac.newXPath();
- InputStream inputStream = new ByteArrayInputStream(filedata.getBytes());
- XMLReader reader = new Parser();
- reader.setFeature(Parser.namespacesFeature, false);
- reader.setFeature(Parser.namespacePrefixesFeature, false);
- reader.setFeature(Parser.ignoreBogonsFeature, true);
-
- Transformer transformer = TransformerFactory.newInstance().newTransformer();
- DOMResult result = new DOMResult();
- transformer.transform(new SAXSource(reader, new InputSource(inputStream)), result);
- org.w3c.dom.Node htmlNode = result.getNode();
-
- if (needsSummary) {
- StringBuilder sumStrings = new StringBuilder();
- XPathExpression ItempropDescExpr = xpath.compile("/descendant-or-self::*"
- + "[@itemprop='description'][1]//text()[string(.)]");
- org.w3c.dom.NodeList nodes = (org.w3c.dom.NodeList) ItempropDescExpr.evaluate(htmlNode,
- XPathConstants.NODESET);
- if (nodes.getLength() > 0) {
- for (int i = 0; i < nodes.getLength(); i++) {
- String tx = nodes.item(i).getNodeValue();
- sumStrings.append(tx);
- sumFrom = "itemprop";
- }
- } else {
- XPathExpression FirstParaExpr = xpath.compile("//p[not(../../../"
- + "@class='notice-developers') and not(../@class='sidebox')"
- + "and not(@class)]//text()");
- nodes = (org.w3c.dom.NodeList) FirstParaExpr.evaluate(htmlNode, XPathConstants.NODESET);
- if (nodes.getLength() > 0) {
- for (int i = 0; i < nodes.getLength(); i++) {
- String tx = nodes.item(i).getNodeValue();
- sumStrings.append(tx + " ");
- sumFrom = "markup";
- }
- }
- }
- //found a summary string, now normalize it
- sum = sumStrings.toString().trim();
- if ((sum != null) && (!"".equals(sum))) {
- sum = getSummaryNormalized(sum);
- }
- //normalized summary ended up being too short to be meaningful
- if ("".equals(sum)) {
- if (DBG) System.out.println("Warning: description too short! (" + sum.length()
- + "chars) ...\n\n");
- }
- //summary looks good, store it to the file hdf data
- hdf.setValue("page.metaDescription", sum);
- }
- if (needsImage) {
- XPathExpression ItempropImageExpr = xpath.compile("//*[@itemprop='image']/@src");
- org.w3c.dom.NodeList imgNodes = (org.w3c.dom.NodeList) ItempropImageExpr.evaluate(htmlNode,
- XPathConstants.NODESET);
- if (imgNodes.getLength() > 0) {
- imageUrl = imgNodes.item(0).getNodeValue();
- imgFrom = "itemprop";
- } else {
- XPathExpression FirstImgExpr = xpath.compile("//img/@src");
- imgNodes = (org.w3c.dom.NodeList) FirstImgExpr.evaluate(htmlNode, XPathConstants.NODESET);
- if (imgNodes.getLength() > 0) {
- //iterate nodes looking for valid image url and normalize.
- for (int i = 0; i < imgNodes.getLength(); i++) {
- String tx = imgNodes.item(i).getNodeValue();
- //qualify and normalize the image
- imageUrl = getImageUrlNormalized(tx);
- //this img src did not qualify, keep looking...
- if ("".equals(imageUrl)) {
- if (DBG) System.out.println(" >>>>> Discarded image: " + tx);
- continue;
- } else {
- imgFrom = "markup";
- break;
- }
- }
- }
- }
- //img src url looks good, store it to the file hdf data
- hdf.setValue("page.image", imageUrl);
- }
- if (DBG) System.out.println("Image (" + imgFrom + "): " + imageUrl);
- if (DBG) System.out.println("Summary (" + sumFrom + "): " + sum.length() + " chars\n\n"
- + sum + "\n");
- return;
-
- } catch (Exception e) {
- if (DBG) System.out.println(" >>>>> Exception: " + e + "\n");
- }
+ return tagList;
}
/**
@@ -284,17 +132,14 @@
tagList = tagList.replaceAll("\"", "");
String[] tagParts = tagList.split(",");
for (int iter = 0; iter < tagParts.length; iter++) {
- tags.append("\"");
+ tags.append("'");
if (tag.equals("meta.tags") && sLowercaseTags) {
tagParts[iter] = tagParts[iter].toLowerCase();
} else if (tag.equals("page.tags") && sLowercaseKeywords) {
tagParts[iter] = tagParts[iter].toLowerCase();
}
- if (tag.equals("meta.tags")) {
- tags.append("#"); //to match hashtag format used with yt/blogger resources
- }
tags.append(tagParts[iter].trim());
- tags.append("\"");
+ tags.append("'");
if (iter < tagParts.length - 1) {
tags.append(",");
}
@@ -343,7 +188,7 @@
StringBuilder outTitle = new StringBuilder();
String title = hdf.getValue(tag, "");
if (!title.isEmpty()) {
- title = title.replaceAll("\"", """);
+ title = title.replaceAll("\"", "'");
if (title.indexOf("<span") != -1) {
String[] splitTitle = title.split("<span(.*?)</span>");
title = splitTitle[0];
@@ -381,86 +226,6 @@
}
/**
- * Normalize a page summary string and truncate as needed. Strings
- * exceeding max_chars are truncated at the first word boundary
- * following the max_size marker. Strings smaller than min_chars
- * are discarded (as they are assumed to be too little context).
- *
- * @param s String extracted from the page as it's summary.
- * @return A normalized string value.
- */
- public static String getSummaryNormalized(String s) {
- String str = "";
- int max_chars = 250;
- int min_chars = 50;
- int marker = 0;
- if (s.length() < min_chars) {
- return str;
- } else {
- str = s.replaceAll("^\"|\"$", "");
- str = str.replaceAll("\\s+", " ");
- str = JD_TAG_PATTERN.matcher(str).replaceAll("$1");
- str = str.replaceAll("\"", """);
- BreakIterator bi = BreakIterator.getWordInstance();
- bi.setText(str);
- if (str.length() > max_chars) {
- marker = bi.following(max_chars);
- } else {
- marker = bi.last();
- }
- str = str.substring(0, marker);
- str = str.concat("\u2026" );
- }
- return str;
- }
-
- //Disqualify img src urls that include these substrings
- public static String[] IMAGE_EXCLUDE = {"/triangle-", "favicon","android-logo",
- "icon_play.png", "robot-tiny"};
-
- public static boolean inList(String s, String[] list) {
- for (String t : list) {
- if (s.contains(t)) {
- return true;
- }
- }
- return false;
- }
-
- /**
- * Extract and normalize an img src url.
- *
- * @param url Absolute or relative img src url.
- * @return Normalized absolute url if qualified, else empty
- */
- public static String getImageUrlNormalized(String url) {
- String DACROOT = "http://developer.android.com/";
- String absUrl = "";
- // validate to avoid choosing using specific images
- if ((url != null) && (!inList(url, IMAGE_EXCLUDE))) {
- absUrl = url.replace("{@docRoot}", DACROOT);
- absUrl = absUrl.replaceFirst("^/(?!/)", DACROOT);
- }
- return absUrl;
- }
-
- /**
- * Normalize a dac page url by making it absolute.
- *
- * @param url A page url
- * @return An absolute url reference
- */
- public static String getPageUrlNormalized(String url) {
- String DACROOT = "http://developer.android.com/";
- String absUrl = "";
- if (url !=null) {
- absUrl = url.replace("{@docRoot}", DACROOT);
- absUrl = absUrl.replaceFirst("^/(?!/)", DACROOT);
- }
- return absUrl;
- }
-
- /**
* Given a metadata node, add it as a child of a root node based on its
* type. If there is no root node that matches the node's type, create one
* and add the metadata node as a child node.
@@ -507,7 +272,6 @@
for (String t : nodeTags) { //process each of the meta.tags
for (Node n : rootTagNodesList) {
if (n.getLabel().equals(t.toString())) {
- n.getTags().add(String.valueOf(iter));
matched = true;
break; // add to the first root node only
} // tag did not match
@@ -619,16 +383,16 @@
final int n = list.size();
for (int i = 0; i < n; i++) {
buf.append("\n {\n");
- buf.append(" \"title\":\"" + list.get(i).mLabel + "\",\n" );
- buf.append(" \"titleFriendly\":\"" + list.get(i).mTitleFriendly + "\",\n" );
- buf.append(" \"summary\":\"" + list.get(i).mSummary + "\",\n" );
- buf.append(" \"url\":\"" + "http://developer.android.com/" + list.get(i).mLink + "\",\n" );
- buf.append(" \"group\":\"" + list.get(i).mGroup + "\",\n" );
+ buf.append(" title:\"" + list.get(i).mLabel + "\",\n" );
+ buf.append(" titleFriendly:\"" + list.get(i).mTitleFriendly + "\",\n" );
+ buf.append(" summary:\"" + list.get(i).mSummary + "\",\n" );
+ buf.append(" url:\"" + list.get(i).mLink + "\",\n" );
+ buf.append(" group:\"" + list.get(i).mGroup + "\",\n" );
list.get(i).renderArrayType(buf, list.get(i).mKeywords, "keywords");
list.get(i).renderArrayType(buf, list.get(i).mTags, "tags");
- buf.append(" \"image\":\"" + list.get(i).mImage + "\",\n" );
- buf.append(" \"lang\":\"" + list.get(i).mLang + "\",\n" );
- buf.append(" \"type\":\"" + list.get(i).mType + "\"");
+ buf.append(" image:\"" + list.get(i).mImage + "\",\n" );
+ buf.append(" lang:\"" + list.get(i).mLang + "\",\n" );
+ buf.append(" type:\"" + list.get(i).mType + "\"");
buf.append("\n }");
if (i != n - 1) {
buf.append(", ");
@@ -670,6 +434,7 @@
} else {
final int n = list.size();
for (int i = 0; i < n; i++) {
+
buf.append("\n " + list.get(i).mLabel + ":[");
renderArrayValue(buf, list.get(i).mTags);
buf.append("]");
@@ -687,7 +452,7 @@
* @param key The key for the pair.
*/
void renderArrayType(StringBuilder buf, List<String> type, String key) {
- buf.append(" \"" + key + "\": [");
+ buf.append(" " + key + ": [");
renderArrayValue(buf, type);
buf.append("],\n");
}
diff --git a/src/com/google/doclava/SampleCode.java b/src/com/google/doclava/SampleCode.java
index 57f1c54..45f9833 100644
--- a/src/com/google/doclava/SampleCode.java
+++ b/src/com/google/doclava/SampleCode.java
@@ -296,6 +296,8 @@
ClearPage.write(hdf, "sampleindex.cs", mDest + "index" + Doclava.htmlExtension);
} else {
DocFile.writePage(filename, rel, mDest + "index" + Doclava.htmlExtension, hdf);
+ PageMetadata.setPageMetadata(f, rel, mDest + "index" + Doclava.htmlExtension,
+ hdf, Doclava.sTaglist);
}
} else if (f.isFile()) {
//gather metadata for toc and jd_lists_unified