Revert "Infer document and sample metadata when generating docs." This reverts commit 3cdd961719e41b26b129abbeefa644f09a8e449c. Change-Id: I991368d53b29cbd308de5ffcfe4f7899b277ca64

commit: e9ab1b94fc0414fe72646b90b355562abf98d86e [log] [tgz]
author: Dirk Dougherty <ddougherty@google.com> Sat Feb 22 00:49:44 2014 +0000
committer: Dirk Dougherty <ddougherty@google.com> Sat Feb 22 00:49:44 2014 +0000
tree: 4055fb59b7f612f74aa8b278b462771d3f69e1d1
parent: 3cdd961719e41b26b129abbeefa644f09a8e449c [diff]
diff --git a/src/com/google/doclava/DocFile.java b/src/com/google/doclava/DocFile.java
index c777bd9..a00bac5 100644
--- a/src/com/google/doclava/DocFile.java
+++ b/src/com/google/doclava/DocFile.java

@@ -72,7 +72,7 @@
     }
     return outFrag;
   }
-
+  
   public static Data getPageMetadata (String docfile, Data hdf) {
     //utility method for extracting metadata without generating file output.
     if (hdf == null) {
@@ -230,9 +230,6 @@
       } else if (filename.indexOf("compatibility") == 0) {
         hdf.setValue("compatibility", "true");
       }
-      //set metadata for this file in jd_lists_unified
-      PageMetadata.setPageMetadata(docfile, relative, outfile, hdf, Doclava.sTaglist);
-
       if (fromTemplate.equals("sdk")) {
         ClearPage.write(hdf, "sdkpage.cs", outfile);
       } else {

diff --git a/src/com/google/doclava/Doclava.java b/src/com/google/doclava/Doclava.java
index f335e20..213492a 100644
--- a/src/com/google/doclava/Doclava.java
+++ b/src/com/google/doclava/Doclava.java

@@ -768,6 +768,11 @@
           Data data = makeHDF();
           String filename = templ.substring(0, len - 3) + htmlExtension;
           DocFile.writePage(f.getAbsolutePath(), relative, filename, data);
+          String[] sections = relative.split("\\/");
+          boolean isIntl = ((sections.length > 0) && (sections[0].equals("intl")));
+          //if (!isIntl) {
+          PageMetadata.setPageMetadata(f, relative, filename, data, sTaglist);
+          //}
         } else if(!f.getName().equals(".DS_Store")){
               Data data = makeHDF();
               String hdfValue = data.getValue("sac") == null ? "" : data.getValue("sac");

diff --git a/src/com/google/doclava/LinkReference.java b/src/com/google/doclava/LinkReference.java
index 816bdb1..dfece8e 100644
--- a/src/com/google/doclava/LinkReference.java
+++ b/src/com/google/doclava/LinkReference.java

@@ -59,7 +59,7 @@
   public boolean good;
 
   /**
-   * regex pattern to use when matching explicit 'a href' reference text
+   * regex pattern to use when matching explicit "<a href" reference text
    */
   private static final Pattern HREF_PATTERN =
       Pattern.compile("^<a href=\"([^\"]*)\">([^<]*)</a>[ \n\r\t]*$", Pattern.CASE_INSENSITIVE);

diff --git a/src/com/google/doclava/PageMetadata.java b/src/com/google/doclava/PageMetadata.java
index 8825bb2..c22ac0f 100644
--- a/src/com/google/doclava/PageMetadata.java
+++ b/src/com/google/doclava/PageMetadata.java

@@ -16,8 +16,6 @@
 
 package com.google.doclava;
 
-import java.io.*;
-import java.text.BreakIterator;
 import java.util.ArrayList;
 import java.util.Collections;
 import java.util.Comparator;
@@ -28,24 +26,6 @@
 
 import com.google.clearsilver.jsilver.data.Data;
 
-import org.ccil.cowan.tagsoup.*;
-import org.xml.sax.XMLReader;
-import org.xml.sax.InputSource;
-import org.xml.sax.Attributes;
-import org.xml.sax.helpers.DefaultHandler;
-
-import org.w3c.dom.Node;
-import org.w3c.dom.NodeList;
-
-import javax.xml.transform.dom.DOMResult;
-import javax.xml.transform.sax.SAXSource;
-import javax.xml.transform.Transformer;
-import javax.xml.transform.TransformerFactory;
-import javax.xml.xpath.XPath;
-import javax.xml.xpath.XPathConstants;
-import javax.xml.xpath.XPathExpression;
-import javax.xml.xpath.XPathFactory;
-
 /**
 * Metadata associated with a specific documentation page. Extracts
 * metadata based on the page's declared hdf vars (meta.tags and others)
@@ -63,14 +43,6 @@
   String mTagList;
   static boolean sLowercaseTags = true;
   static boolean sLowercaseKeywords = true;
-  private static final boolean DBG = false;
-
-  /**
-   * regex pattern to match javadoc @link and similar tags. Extracts
-   * root symbol to $1.
-   */
-  private static final Pattern JD_TAG_PATTERN =
-      Pattern.compile("\\{@.*?[\\s\\.\\#]([A-Za-z\\(\\)\\d_]+)(?=\u007D)\u007D");
 
   public PageMetadata(File source, String dest, List<Node> taglist) {
     mSource = source;
@@ -115,156 +87,32 @@
   * are normalized. Unsupported metadata fields are ignored. See
   * Node for supported metadata fields and methods for accessing values.
   *
-  * @param docfile The file from which to extract metadata.
+  * @param file The file from which to extract metadata.
   * @param dest The output path for the file, used to set link to page.
   * @param filename The file from which to extract metadata.
   * @param hdf Data object in which to store the metadata values.
   * @param tagList The file from which to extract metadata.
+  * @return tagList with new node added.
   */
-  public static void setPageMetadata(String docfile, String dest, String filename,
+  public static List<Node> setPageMetadata(File file, String dest, String filename,
       Data hdf, List<Node> tagList) {
     //exclude this page if author does not want it included
     boolean excludeNode = "true".equals(hdf.getValue("excludeFromSuggestions",""));
-
-    //check whether summary and image exist and if not, get them from itemprop/markup
-    Boolean needsSummary = "".equals(hdf.getValue("page.metaDescription", ""));
-    Boolean needsImage = "".equals(hdf.getValue("page.image", ""));
-    if ((needsSummary) || (needsImage)) {
-      //try to extract the metadata from itemprop and markup
-      inferMetadata(docfile, hdf, needsSummary, needsImage);
-    }
-
-    //extract available metadata and set it in a node
     if (!excludeNode) {
       Node pageMeta = new Node.Builder().build();
       pageMeta.setLabel(getTitleNormalized(hdf, "page.title"));
       pageMeta.setTitleFriendly(hdf.getValue("page.titleFriendly",""));
-      pageMeta.setSummary(hdf.getValue("page.metaDescription",""));
-      pageMeta.setLink(getPageUrlNormalized(filename));
+      pageMeta.setSummary(hdf.getValue("page.summary",""));
+      pageMeta.setLink(filename);
       pageMeta.setGroup(getStringValueNormalized(hdf,"sample.group"));
       pageMeta.setKeywords(getPageTagsNormalized(hdf, "page.tags"));
       pageMeta.setTags(getPageTagsNormalized(hdf, "meta.tags"));
-      //use keywords as tags if no tags are available
-      if (pageMeta.getTags() == null) {
-        pageMeta.setTags(getPageTagsNormalized(hdf, "page.tags"));
-      }
-      pageMeta.setImage(getImageUrlNormalized(hdf.getValue("page.image", "")));
+      pageMeta.setImage(getStringValueNormalized(hdf, "page.image"));
       pageMeta.setLang(getLangStringNormalized(filename));
       pageMeta.setType(getStringValueNormalized(hdf, "page.type"));
       appendMetaNodeByType(pageMeta, tagList);
     }
-  }
-
-  /**
-  * Attempt to infer page metadata based on the contents of the
-  * file. Load and parse the file as a dom tree. Select values
-  * in this order: 1. dom node specifically tagged with
-  * microdata (itemprop). 2. first qualitifed p or img node.
-  *
-  * @param docfile The file from which to extract metadata.
-  * @param hdf Data object in which to store the metadata values.
-  * @param needsSummary Whether to extract summary metadata.
-  * @param needsImage Whether to extract image metadata.
-  */
-  public static void inferMetadata(String docfile, Data hdf,
-      Boolean needsSummary, Boolean needsImage) {
-    String sum = "";
-    String imageUrl = "";
-    String sumFrom = needsSummary ? "none" : "hdf";
-    String imgFrom = needsImage ? "none" : "hdf";
-    String filedata = hdf.getValue("commentText", "");
-    if (DBG) System.out.println("----- " + docfile + "\n");
-
-    try {
-      XPathFactory xpathFac = XPathFactory.newInstance();
-      XPath xpath = xpathFac.newXPath();
-      InputStream inputStream = new ByteArrayInputStream(filedata.getBytes());
-      XMLReader reader = new Parser();
-      reader.setFeature(Parser.namespacesFeature, false);
-      reader.setFeature(Parser.namespacePrefixesFeature, false);
-      reader.setFeature(Parser.ignoreBogonsFeature, true);
-
-      Transformer transformer = TransformerFactory.newInstance().newTransformer();
-      DOMResult result = new DOMResult();
-      transformer.transform(new SAXSource(reader, new InputSource(inputStream)), result);
-      org.w3c.dom.Node htmlNode = result.getNode();
-
-      if (needsSummary) {
-        StringBuilder sumStrings = new StringBuilder();
-        XPathExpression ItempropDescExpr = xpath.compile("/descendant-or-self::*"
-            + "[@itemprop='description'][1]//text()[string(.)]");
-        org.w3c.dom.NodeList nodes = (org.w3c.dom.NodeList) ItempropDescExpr.evaluate(htmlNode,
-            XPathConstants.NODESET);
-        if (nodes.getLength() > 0) {
-          for (int i = 0; i < nodes.getLength(); i++) {
-            String tx = nodes.item(i).getNodeValue();
-            sumStrings.append(tx);
-            sumFrom = "itemprop";
-          }
-        } else {
-          XPathExpression FirstParaExpr = xpath.compile("//p[not(../../../"
-              + "@class='notice-developers') and not(../@class='sidebox')"
-              + "and not(@class)]//text()");
-          nodes = (org.w3c.dom.NodeList) FirstParaExpr.evaluate(htmlNode, XPathConstants.NODESET);
-          if (nodes.getLength() > 0) {
-            for (int i = 0; i < nodes.getLength(); i++) {
-              String tx = nodes.item(i).getNodeValue();
-              sumStrings.append(tx + " ");
-              sumFrom = "markup";
-            }
-          }
-        }
-        //found a summary string, now normalize it
-        sum = sumStrings.toString().trim();
-        if ((sum != null) && (!"".equals(sum))) {
-          sum = getSummaryNormalized(sum);
-        }
-        //normalized summary ended up being too short to be meaningful
-        if ("".equals(sum)) {
-           if (DBG) System.out.println("Warning: description too short! (" + sum.length()
-            + "chars) ...\n\n");
-        }
-        //summary looks good, store it to the file hdf data
-        hdf.setValue("page.metaDescription", sum);
-      }
-      if (needsImage) {
-        XPathExpression ItempropImageExpr = xpath.compile("//*[@itemprop='image']/@src");
-        org.w3c.dom.NodeList imgNodes = (org.w3c.dom.NodeList) ItempropImageExpr.evaluate(htmlNode,
-            XPathConstants.NODESET);
-        if (imgNodes.getLength() > 0) {
-          imageUrl = imgNodes.item(0).getNodeValue();
-          imgFrom = "itemprop";
-        } else {
-          XPathExpression FirstImgExpr = xpath.compile("//img/@src");
-          imgNodes = (org.w3c.dom.NodeList) FirstImgExpr.evaluate(htmlNode, XPathConstants.NODESET);
-          if (imgNodes.getLength() > 0) {
-            //iterate nodes looking for valid image url and normalize.
-            for (int i = 0; i < imgNodes.getLength(); i++) {
-              String tx = imgNodes.item(i).getNodeValue();
-              //qualify and normalize the image
-              imageUrl = getImageUrlNormalized(tx);
-              //this img src did not qualify, keep looking...
-              if ("".equals(imageUrl)) {
-                if (DBG) System.out.println("    >>>>> Discarded image: " + tx);
-                continue;
-              } else {
-                imgFrom = "markup";
-                break;
-              }
-            }
-          }
-        }
-        //img src url looks good, store it to the file hdf data
-        hdf.setValue("page.image", imageUrl);
-      }
-      if (DBG) System.out.println("Image (" + imgFrom + "): " + imageUrl);
-      if (DBG) System.out.println("Summary (" + sumFrom + "): " + sum.length() + " chars\n\n"
-          + sum + "\n");
-      return;
-
-    } catch (Exception e) {
-      if (DBG) System.out.println("    >>>>> Exception: " + e + "\n");
-    }
+    return tagList;
   }
 
   /**
@@ -284,17 +132,14 @@
       tagList = tagList.replaceAll("\"", "");
       String[] tagParts = tagList.split(",");
       for (int iter = 0; iter < tagParts.length; iter++) {
-        tags.append("\"");
+        tags.append("'");
         if (tag.equals("meta.tags") && sLowercaseTags) {
           tagParts[iter] = tagParts[iter].toLowerCase();
         } else if (tag.equals("page.tags") && sLowercaseKeywords) {
           tagParts[iter] = tagParts[iter].toLowerCase();
         }
-        if (tag.equals("meta.tags")) {
-          tags.append("#"); //to match hashtag format used with yt/blogger resources
-        }
         tags.append(tagParts[iter].trim());
-        tags.append("\"");
+        tags.append("'");
         if (iter < tagParts.length - 1) {
           tags.append(",");
         }
@@ -343,7 +188,7 @@
     StringBuilder outTitle =  new StringBuilder();
     String title = hdf.getValue(tag, "");
     if (!title.isEmpty()) {
-      title = title.replaceAll("\"", "&quot;");
+      title = title.replaceAll("\"", "'");
       if (title.indexOf("<span") != -1) {
         String[] splitTitle = title.split("<span(.*?)</span>");
         title = splitTitle[0];
@@ -381,86 +226,6 @@
   }
 
   /**
-  * Normalize a page summary string and truncate as needed. Strings
-  * exceeding max_chars are truncated at the first word boundary
-  * following the max_size marker. Strings smaller than min_chars
-  * are discarded (as they are assumed to be too little context).
-  *
-  * @param s String extracted from the page as it's summary.
-  * @return A normalized string value.
-  */
-  public static String getSummaryNormalized(String s) {
-    String str = "";
-    int max_chars = 250;
-    int min_chars = 50;
-    int marker = 0;
-    if (s.length() < min_chars) {
-      return str;
-    } else {
-      str = s.replaceAll("^\"|\"$", "");
-      str = str.replaceAll("\\s+", " ");
-      str = JD_TAG_PATTERN.matcher(str).replaceAll("$1");
-      str = str.replaceAll("\"", "&quot;");
-      BreakIterator bi = BreakIterator.getWordInstance();
-      bi.setText(str);
-      if (str.length() > max_chars) {
-        marker = bi.following(max_chars);
-      } else {
-        marker = bi.last();
-      }
-      str = str.substring(0, marker);
-      str = str.concat("\u2026" );
-    }
-    return str;
-  }
-
-  //Disqualify img src urls that include these substrings
-  public static String[] IMAGE_EXCLUDE = {"/triangle-", "favicon","android-logo",
-      "icon_play.png", "robot-tiny"};
-
-  public static boolean inList(String s, String[] list) {
-    for (String t : list) {
-      if (s.contains(t)) {
-        return true;
-      }
-    }
-    return false;
-  }
-
-  /**
-  * Extract and normalize an img src url.
-  *
-  * @param url Absolute or relative img src url.
-  * @return Normalized absolute url if qualified, else empty
-  */
-  public static String getImageUrlNormalized(String url) {
-    String DACROOT = "http://developer.android.com/";
-    String absUrl = "";
-    // validate to avoid choosing using specific images
-    if ((url != null) && (!inList(url, IMAGE_EXCLUDE))) {
-      absUrl = url.replace("{@docRoot}", DACROOT);
-      absUrl = absUrl.replaceFirst("^/(?!/)", DACROOT);
-    }
-    return absUrl;
-  }
-
-  /**
-  * Normalize a dac page url by making it absolute.
-  *
-  * @param url A page url
-  * @return An absolute url reference
-  */
-  public static String getPageUrlNormalized(String url) {
-    String DACROOT = "http://developer.android.com/";
-    String absUrl = "";
-    if (url !=null) {
-      absUrl = url.replace("{@docRoot}", DACROOT);
-      absUrl = absUrl.replaceFirst("^/(?!/)", DACROOT);
-    }
-    return absUrl;
-  }
-
-  /**
   * Given a metadata node, add it as a child of a root node based on its
   * type. If there is no root node that matches the node's type, create one
   * and add the metadata node as a child node.
@@ -507,7 +272,6 @@
         for (String t : nodeTags) { //process each of the meta.tags
           for (Node n : rootTagNodesList) {
             if (n.getLabel().equals(t.toString())) {
-              n.getTags().add(String.valueOf(iter));
               matched = true;
               break; // add to the first root node only
             } // tag did not match
@@ -619,16 +383,16 @@
         final int n = list.size();
         for (int i = 0; i < n; i++) {
           buf.append("\n      {\n");
-          buf.append("        \"title\":\"" + list.get(i).mLabel + "\",\n" );
-          buf.append("        \"titleFriendly\":\"" + list.get(i).mTitleFriendly + "\",\n" );
-          buf.append("        \"summary\":\"" + list.get(i).mSummary + "\",\n" );
-          buf.append("        \"url\":\"" + "http://developer.android.com/" + list.get(i).mLink + "\",\n" );
-          buf.append("        \"group\":\"" + list.get(i).mGroup + "\",\n" );
+          buf.append("        title:\"" + list.get(i).mLabel + "\",\n" );
+          buf.append("        titleFriendly:\"" + list.get(i).mTitleFriendly + "\",\n" );
+          buf.append("        summary:\"" + list.get(i).mSummary + "\",\n" );
+          buf.append("        url:\"" + list.get(i).mLink + "\",\n" );
+          buf.append("        group:\"" + list.get(i).mGroup + "\",\n" );
           list.get(i).renderArrayType(buf, list.get(i).mKeywords, "keywords");
           list.get(i).renderArrayType(buf, list.get(i).mTags, "tags");
-          buf.append("        \"image\":\"" + list.get(i).mImage + "\",\n" );
-          buf.append("        \"lang\":\"" + list.get(i).mLang + "\",\n" );
-          buf.append("        \"type\":\"" + list.get(i).mType + "\"");
+          buf.append("        image:\"" + list.get(i).mImage + "\",\n" );
+          buf.append("        lang:\"" + list.get(i).mLang + "\",\n" );
+          buf.append("        type:\"" + list.get(i).mType + "\"");
           buf.append("\n      }");
           if (i != n - 1) {
             buf.append(", ");
@@ -670,6 +434,7 @@
       } else {
         final int n = list.size();
         for (int i = 0; i < n; i++) {
+
           buf.append("\n    " + list.get(i).mLabel + ":[");
           renderArrayValue(buf, list.get(i).mTags);
           buf.append("]");
@@ -687,7 +452,7 @@
     * @param key The key for the pair.
     */
     void renderArrayType(StringBuilder buf, List<String> type, String key) {
-      buf.append("        \"" + key + "\": [");
+      buf.append("        " + key + ": [");
       renderArrayValue(buf, type);
       buf.append("],\n");
     }

diff --git a/src/com/google/doclava/SampleCode.java b/src/com/google/doclava/SampleCode.java
index 57f1c54..45f9833 100644
--- a/src/com/google/doclava/SampleCode.java
+++ b/src/com/google/doclava/SampleCode.java

@@ -296,6 +296,8 @@
         ClearPage.write(hdf, "sampleindex.cs", mDest + "index" + Doclava.htmlExtension);
       } else {
         DocFile.writePage(filename, rel, mDest + "index" + Doclava.htmlExtension, hdf);
+        PageMetadata.setPageMetadata(f, rel, mDest + "index" + Doclava.htmlExtension,
+            hdf, Doclava.sTaglist);
       }
     } else if (f.isFile()) {
       //gather metadata for toc and jd_lists_unified
commit	e9ab1b94fc0414fe72646b90b355562abf98d86e	[log] [tgz]
author	Dirk Dougherty <ddougherty@google.com>	Sat Feb 22 00:49:44 2014 +0000
committer	Dirk Dougherty <ddougherty@google.com>	Sat Feb 22 00:49:44 2014 +0000
tree	4055fb59b7f612f74aa8b278b462771d3f69e1d1
parent	3cdd961719e41b26b129abbeefa644f09a8e449c [diff]